diff --git a/llvm/docs/CommandGuide/llvm-objcopy.rst b/llvm/docs/CommandGuide/llvm-objcopy.rst --- a/llvm/docs/CommandGuide/llvm-objcopy.rst +++ b/llvm/docs/CommandGuide/llvm-objcopy.rst @@ -142,6 +142,30 @@ Read command-line options and commands from response file ``. +.. option:: --wildcard, -w + + Allow wildcard syntax for symbol-related flags. On by default for + section-related flags. Incompatible with --regex. + + Wildcard syntax allows the following special symbols: + + ====================== ========================= ================== + Character Meaning Equivalent + ====================== ========================= ================== + ``*`` Any number of characters ``.*`` + ``?`` Any single character ``.`` + ``\`` Escape the next character ``\`` + ``[a-z]`` Character class ``[a-z]`` + ``[!a-z]``, ``[^a-z]`` Negated character class ``[^a-z]`` + ====================== ========================= ================== + + Additionally, starting a wildcard with '!' will prevent a match, even if + another flag matches. For example ``-w -N '*' -N '!x'`` will strip all symbols + except for ``x``. + + The order of wildcards does not matter. For example, ``-w -N '*' -N '!x'`` is + the same as ``-w -N '!x' -N '*'``. + COFF-SPECIFIC OPTIONS --------------------- diff --git a/llvm/docs/CommandGuide/llvm-strip.rst b/llvm/docs/CommandGuide/llvm-strip.rst --- a/llvm/docs/CommandGuide/llvm-strip.rst +++ b/llvm/docs/CommandGuide/llvm-strip.rst @@ -104,6 +104,30 @@ Read command-line options and commands from response file ``. +.. option:: --wildcard, -w + + Allow wildcard syntax for symbol-related flags. On by default for + section-related flags. Incompatible with --regex. + + Wildcard syntax allows the following special symbols: + + ====================== ========================= ================== + Character Meaning Equivalent + ====================== ========================= ================== + ``*`` Any number of characters ``.*`` + ``?`` Any single character ``.`` + ``\`` Escape the next character ``\`` + ``[a-z]`` Character class ``[a-z]`` + ``[!a-z]``, ``[^a-z]`` Negated character class ``[^a-z]`` + ====================== ========================= ================== + + Additionally, starting a wildcard with '!' will prevent a match, even if + another flag matches. For example ``-w -N '*' -N '!x'`` will strip all symbols + except for ``x``. + + The order of wildcards does not matter. For example, ``-w -N '*' -N '!x'`` is + the same as ``-w -N '!x' -N '*'``. + COFF-SPECIFIC OPTIONS --------------------- diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -21,7 +21,7 @@ #include // This class represents a glob pattern. Supported metacharacters -// are "*", "?", "[]" and "[^]". +// are "*", "?", "\", "[]", "[^]", and "[!]". namespace llvm { class BitVector; template class ArrayRef; diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -19,7 +19,7 @@ using namespace llvm; static bool hasWildcard(StringRef S) { - return S.find_first_of("?*[") != StringRef::npos; + return S.find_first_of("?*[\\") != StringRef::npos; } // Expands character ranges and returns a bitmap. @@ -60,8 +60,9 @@ } // This is a scanner for the glob pattern. -// A glob pattern token is one of "*", "?", "[]", "[^]" -// (which is a negative form of "[]"), or a non-meta character. +// A glob pattern token is one of "*", "?", "\", "[]", "[^]" +// (which is a negative form of "[]"), "[!]" (which is +// equivalent to "[^]"), or a non-meta character. // This function returns the first token in S. static Expected scan(StringRef &S, StringRef Original) { switch (S[0]) { @@ -74,14 +75,16 @@ S = S.substr(1); return BitVector(256, true); case '[': { - size_t End = S.find(']', 1); + // ']' is allowed as the first character of a character class. '[]' is + // invalid. So, just skip the first character. + size_t End = S.find(']', 2); if (End == StringRef::npos) return make_error("invalid glob pattern: " + Original, errc::invalid_argument); StringRef Chars = S.substr(1, End - 1); S = S.substr(End + 1); - if (Chars.startswith("^")) { + if (Chars.startswith("^") || Chars.startswith("!")) { Expected BV = expand(Chars.substr(1), Original); if (!BV) return BV.takeError(); @@ -89,6 +92,11 @@ } return expand(Chars, Original); } + case '\\': + // Eat this character and fall through below to treat it like a non-meta + // character. + S = S.substr(1); + LLVM_FALLTHROUGH; default: BitVector BV(256, false); BV[(uint8_t)S[0]] = true; @@ -107,8 +115,9 @@ return Pat; } - // S is something like "foo*". We can use startswith(). - if (S.endswith("*") && !hasWildcard(S.drop_back())) { + // S is something like "foo*", and the "* is not escaped. We can use + // startswith(). + if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { Pat.Prefix = S.drop_back(); return Pat; } diff --git a/llvm/test/tools/llvm-objcopy/ELF/wildcard-flags.test b/llvm/test/tools/llvm-objcopy/ELF/wildcard-flags.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/wildcard-flags.test @@ -0,0 +1,162 @@ +## This test checks basic functionality of glob (or "shell wildcard") matching, +## as well as verifying all the relevant flags in llvm-objcopy and llvm-strip +## are configured correctly. +## For more detailed syntax tests, see wildcard-syntax.test. + +# RUN: yaml2obj %s > %t.o + +## Check that --regex and --wildcard cannot be used together. +# RUN: not llvm-objcopy --regex --wildcard %t.o %t.err.o 2>&1 \ +# RUN: | FileCheck %s --check-prefix=ERR +# RUN: not llvm-strip --regex --wildcard %t.o -o %t.err.o 2>&1 \ +# RUN: | FileCheck %s --check-prefix=ERR + +# ERR: error: --regex and --wildcard are incompatible + +## Check that section removal flags default to glob matches. + +## --keep-section: +# RUN: llvm-objcopy --strip-all --keep-section='.f*' %t.o %t.ksec.1.o +# RUN: llvm-readobj --sections %t.ksec.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,SEC,FOO-SEC +# RUN: llvm-strip --strip-all --keep-section='.f*' %t.o -o %t.ksec.2.o +# RUN: cmp %t.ksec.1.o %t.ksec.2.o + +## --only-section: +# RUN: llvm-objcopy --strip-all --only-section='.f*' %t.o %t.osec.1.o +# RUN: llvm-readobj --sections %t.osec.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,SEC,FOO-SEC + +## --remove-section: +# RUN: llvm-objcopy --strip-debug --remove-section='.s??tab' %t.o %t.rsec.1.o +# RUN: llvm-readobj --sections %t.rsec.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,SEC,FOO-SEC,BAR-SEC +# RUN: llvm-strip --strip-debug --remove-section='.s??tab' %t.o -o %t.rsec.2.o +# RUN: cmp %t.rsec.1.o %t.rsec.2.o + +## Check that symbol removal options default to literal matches. Adding -w +## enables glob support for these options. + +## --globalize-symbol: +# RUN: llvm-objcopy --globalize-symbol='*' %t.o %t.globsym.1.o +# RUN: llvm-readobj --symbols %t.globsym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,LOCAL,FOO-SYM,BAR-SYM + +# RUN: llvm-objcopy -w --globalize-symbol='*' %t.o %t.globsym.2.o +# RUN: llvm-readobj --symbols %t.globsym.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,GLOBAL,FOO-SYM,BAR-SYM + +## --keep-symbol: +# RUN: llvm-objcopy --discard-all --keep-symbol='f*' %t.o %t.ksym.1.o +# RUN: llvm-readobj --symbols %t.ksym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK +# RUN: llvm-strip --discard-all --keep-symbol='f*' %t.o -o %t.ksym.2.o +# RUN: cmp %t.ksym.1.o %t.ksym.2.o + +# RUN: llvm-objcopy --discard-all -w --keep-symbol='f*' %t.o %t.ksym.3.o +# RUN: llvm-readobj --symbols %t.ksym.3.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO-SYM +# RUN: llvm-strip --discard-all -w --keep-symbol='f*' %t.o -o %t.ksym.4.o +# RUN: cmp %t.ksym.3.o %t.ksym.4.o + +## --localize-symbol: +## Note: Use %t.globsym.2.o instead of %t.o since those symbols are global. +# RUN: llvm-objcopy --localize-symbol='*' %t.globsym.2.o %t.localsym.1.o +# RUN: llvm-readobj --symbols %t.localsym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,GLOBAL,FOO-SYM,BAR-SYM + +# RUN: llvm-objcopy -w --localize-symbol='*' %t.globsym.2.o %t.localsym.2.o +# RUN: llvm-readobj --symbols %t.localsym.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,LOCAL,FOO-SYM,BAR-SYM + +## --strip-symbol: +# RUN: llvm-objcopy --strip-symbol='f*' %t.o %t.stripsym.1.o +# RUN: llvm-readobj --symbols %t.stripsym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO-SYM,BAR-SYM +# RUN: llvm-strip --strip-symbol='f*' %t.o -o %t.stripsym.2.o +# RUN: cmp %t.stripsym.1.o %t.stripsym.2.o + +# RUN: llvm-objcopy -w --strip-symbol='f*' %t.o %t.stripsym.3.o +# RUN: llvm-readobj --symbols %t.stripsym.3.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,BAR-SYM +# RUN: llvm-strip -w --strip-symbol='f*' %t.o -o %t.stripsym.4.o +# RUN: cmp %t.stripsym.3.o %t.stripsym.4.o + +## --strip-unneeded-symbol: +# RUN: llvm-objcopy --strip-unneeded-symbol='f*' %t.o %t.stripunsym.1.o +# RUN: llvm-readobj --symbols %t.stripunsym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO-SYM,BAR-SYM + +# RUN: llvm-objcopy -w --strip-unneeded-symbol='f*' %t.o %t.stripunsym.2.o +# RUN: llvm-readobj --symbols %t.stripunsym.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,BAR-SYM + +## --weaken-symbol: +## Note: Use %t.globsym.2.o instead of %t.o since those symbols are global. +# RUN: llvm-objcopy --weaken-symbol='*' %t.globsym.2.o %t.weaksym.1.o +# RUN: llvm-readobj --symbols %t.weaksym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,GLOBAL,FOO-SYM,BAR-SYM + +# RUN: llvm-objcopy -w --weaken-symbol='*' %t.globsym.2.o %t.weaksym.2.o +# RUN: llvm-readobj --symbols %t.weaksym.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,WEAK,FOO-SYM,BAR-SYM + +## --keep-global-symbol: +## Note: Use %t.globsym.2.o instead of %t.o since those symbols are global. +# RUN: llvm-objcopy --keep-global-symbol='*' %t.globsym.2.o %t.keepgsym.1.o +# RUN: llvm-readobj --symbols %t.keepgsym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,LOCAL,FOO-SYM,BAR-SYM + +# RUN: llvm-objcopy -w --keep-global-symbol='*' %t.globsym.2.o %t.keepgsym.2.o +# RUN: llvm-readobj --symbols %t.keepgsym.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,GLOBAL,FOO-SYM,BAR-SYM + +## Check that -w is accepted as an alias for --wildcard. +# RUN: llvm-objcopy --wildcard --keep-global-symbol='*' %t.globsym.2.o %t.keepgsym.3.o +# RUN: cmp %t.keepgsym.2.o %t.keepgsym.3.o + +# CHECK: LoadName: +# CHECK: Name: (0) + +# FOO-SEC: Name: .foo + +# FOO-SYM: Name: foo +# GLOBAL: Binding: Global +# WEAK: Binding: Weak +# LOCAL: Binding: Local + +# BAR-SEC: Name: .bar +# BAR-SYM: Name: bar +# GLOBAL: Binding: Global +# WEAK: Binding: Weak +# LOCAL: Binding: Local + +# SEC: Name: .shstrtab + +!ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .foo + Type: SHT_PROGBITS + - Name: .bar + Type: SHT_PROGBITS +Symbols: + - Name: foo + Type: STT_FUNC + Section: .foo + - Name: bar + Type: STT_FUNC + Section: .foo diff --git a/llvm/test/tools/llvm-objcopy/ELF/wildcard-syntax.test b/llvm/test/tools/llvm-objcopy/ELF/wildcard-syntax.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/wildcard-syntax.test @@ -0,0 +1,149 @@ +## This test checks that llvm-objcopy accepts glob (or "shell wildcard") syntax +## for the --wildcard (-w) flag correctly. + +# RUN: yaml2obj --docnum=1 %s > %t.o + +## * matches all characters. +# RUN: llvm-objcopy --remove-section='.f*' %t.o %t.glob.o +# RUN: llvm-readobj --sections %t.glob.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,BAR + +## Glob matches are full matches. ("*a" does not match ".bar"). +# RUN: llvm-objcopy --remove-section='*a' %t.o %t.full.o +# RUN: llvm-readobj --sections %t.full.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO,BAR + +## ? matches one character. +# RUN: llvm-objcopy --remove-section='.b?r' %t.o %t.question.o +# RUN: llvm-readobj --sections %t.question.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO + +## ! (as a leading character) prevents matches, and is not dependent on +## ordering. +# RUN: llvm-objcopy --remove-section='.???' --remove-section='!.f*' \ +# RUN: %t.o %t.negmatch1.o +# RUN: llvm-readobj --sections %t.negmatch1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO +# RUN: llvm-objcopy --remove-section='!.f*' --remove-section='.???' \ +# RUN: %t.o %t.negmatch2.o +# RUN: llvm-readobj --sections %t.negmatch2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO +# RUN: llvm-objcopy --remove-section='.???' --remove-section='!.f*' \ +# RUN: --remove-section='.???' %t.o %t.negmatch3.o +# RUN: llvm-readobj --sections %t.negmatch3.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO + +## [a-z] matches a range of characters. +# RUN: llvm-objcopy --remove-section='.[a-c][a-a][q-s]' %t.o %t.range.o +# RUN: llvm-readobj --sections %t.range.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO + +## [^a-z] or [!a-z] match a negated range of characters. +# RUN: llvm-objcopy --remove-section='.[^x]oo' %t.o %t.negrange.1.o +# RUN: llvm-readobj --sections %t.negrange.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,BAR +# RUN: llvm-objcopy --remove-section='.[!x]oo' %t.o %t.negrange.2.o +# RUN: llvm-readobj --sections %t.negrange.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,BAR + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .foo + Type: SHT_PROGBITS + - Name: .bar + Type: SHT_PROGBITS + +## Use a separate test file with special characters for the following tests. + +# RUN: yaml2obj --docnum=2 %s > %t.special.o + +## \ escapes wildcard characters. +# RUN: llvm-objcopy --remove-section='\*' %t.special.o %t.escape.1.o +# RUN: llvm-readobj --sections %t.escape.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,DOT,QUESTION,LEFT-BRACKET,RIGHT-BRACKET,INVALID-GLOB,Z,XYZ,FOO +# RUN: llvm-objcopy --remove-section='\?' %t.special.o %t.escape.2.o +# RUN: llvm-readobj --sections %t.escape.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,DOT,ASTERISK,LEFT-BRACKET,RIGHT-BRACKET,INVALID-GLOB,Z,XYZ,FOO + +## Special characters are not treated like regular expression characters. +# RUN: llvm-objcopy --remove-section='.' %t.special.o %t.dot.o +# RUN: llvm-readobj --sections %t.dot.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,ASTERISK,QUESTION,LEFT-BRACKET,RIGHT-BRACKET,INVALID-GLOB,Z,XYZ,FOO + +## Special characters in character classes are treated literally. +## [*] should not get expanded to [.*], which would match both '.' and '*' +# RUN: llvm-objcopy --remove-section='[*]' %t.special.o %t.class.1.o +# RUN: llvm-readobj --sections %t.class.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,DOT,QUESTION,LEFT-BRACKET,RIGHT-BRACKET,INVALID-GLOB,Z,XYZ,FOO + +## ] doesn't close the character class as a first character. This glob matches +## a single character which is one of ']xyz'. ']' and 'z' are removed, and more explicitly, +## section 'xyz]' is not removed, i.e. the glob is not interpreted as "an empty +## character class followed by 'xyz]'" +# RUN: llvm-objcopy --remove-section='[]xyz]' %t.special.o %t.class.2.o +# RUN: llvm-readobj --sections %t.class.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,DOT,ASTERISK,QUESTION,LEFT-BRACKET,INVALID-GLOB,XYZ,FOO + +## An invalid glob expression is interpreted as a literal instead. +# RUN: llvm-objcopy --remove-section='][]' %t.special.o %t.class.3.o 2>&1 \ +# RUN: | FileCheck %s --check-prefix=WARN +# RUN: llvm-readobj --sections %t.class.3.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,DOT,ASTERISK,QUESTION,LEFT-BRACKET,RIGHT-BRACKET,Z,XYZ,FOO + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: . + Type: SHT_PROGBITS + - Name: '*' + Type: SHT_PROGBITS + - Name: '?' + Type: SHT_PROGBITS + - Name: '[' + Type: SHT_PROGBITS + - Name: ']' + Type: SHT_PROGBITS + - Name: '][]' + Type: SHT_PROGBITS + - Name: z + Type: SHT_PROGBITS + - Name: 'xyz]' + Type: SHT_PROGBITS + - Name: '[]xyz]' + Type: SHT_PROGBITS + - Name: .foo + Type: SHT_PROGBITS + +# WARN: warning: invalid glob pattern: ][] + +# CHECK: LoadName: +# CHECK: Name: (0) +# DOT: Name: . +# ASTERISK: Name: * +# QUESTION: Name: ? +# LEFT-BRACKET: Name: [ +# RIGHT-BRACKET: Name: ] +# INVALID-GLOB: Name: ][] +# Z: Name: z +# XYZ: Name: xyz] +# XYZ: Name: []xyz] +# FOO: Name: .foo +# BAR: Name: .bar +# CHECK: Name: .symtab +# CHECK: Name: .strtab +# CHECK: Name: .shstrtab diff --git a/llvm/tools/llvm-objcopy/CommonOpts.td b/llvm/tools/llvm-objcopy/CommonOpts.td --- a/llvm/tools/llvm-objcopy/CommonOpts.td +++ b/llvm/tools/llvm-objcopy/CommonOpts.td @@ -111,3 +111,13 @@ def V : Flag<["-"], "V">, Alias, HelpText<"Alias for --version">; + +def wildcard + : Flag<["--"], "wildcard">, + HelpText<"Allow wildcard syntax for symbol-related flags. Incompatible " + "with --regex. Allows using '*' to match any number of " + "characters, '?' to match any single character, '\' to escape " + "special characters, and '[]' to define character classes. " + "Wildcards beginning with '!' will prevent a match, for example " + "\"-N '*' -N '!x'\" will strip all symbols except for \"x\".">; +def w : Flag<["-"], "w">, Alias, HelpText<"Alias for --wildcard">; diff --git a/llvm/tools/llvm-objcopy/CopyConfig.h b/llvm/tools/llvm-objcopy/CopyConfig.h --- a/llvm/tools/llvm-objcopy/CopyConfig.h +++ b/llvm/tools/llvm-objcopy/CopyConfig.h @@ -19,6 +19,7 @@ #include "llvm/Object/ELFTypes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/Regex.h" // Necessary for llvm::DebugCompressionType::None #include "llvm/Target/TargetOptions.h" @@ -88,28 +89,58 @@ Locals, // --discard-locals (-X) }; -class NameOrRegex { +enum class MatchStyle { + Literal, // Default for symbols. + Wildcard, // Default for sections, or enabled with --wildcard (-w). + Regex, // Enabled with --regex. +}; + +class NameOrPattern { StringRef Name; // Regex is shared between multiple CopyConfig instances. std::shared_ptr R; + std::shared_ptr G; + bool IsPositiveMatch = true; + + NameOrPattern(StringRef N) : Name(N) {} + NameOrPattern(std::shared_ptr R) : R(R) {} + NameOrPattern(std::shared_ptr G, bool IsPositiveMatch) + : G(G), IsPositiveMatch(IsPositiveMatch) {} public: - NameOrRegex(StringRef Pattern, bool IsRegex); - bool operator==(StringRef S) const { return R ? R->match(S) : Name == S; } + // ErrorCallback is used to handle recoverable errors. An Error returned + // by the callback aborts the parsing and is then returned by this function. + static Expected + create(StringRef Pattern, MatchStyle MS, + llvm::function_ref ErrorCallback); + + bool isPositiveMatch() const { return IsPositiveMatch; } + bool operator==(StringRef S) const { + return R ? R->match(S) : G ? G->match(S) : Name == S; + } bool operator!=(StringRef S) const { return !operator==(S); } }; // Matcher that checks symbol or section names against the command line flags // provided for that option. class NameMatcher { - std::vector Matchers; + std::vector PosMatchers; + std::vector NegMatchers; public: - void addMatcher(NameOrRegex Matcher) { - Matchers.push_back(std::move(Matcher)); + Error addMatcher(Expected Matcher) { + if (!Matcher) + return Matcher.takeError(); + if (Matcher->isPositiveMatch()) + PosMatchers.push_back(std::move(*Matcher)); + else + NegMatchers.push_back(std::move(*Matcher)); + return Error::success(); + } + bool matches(StringRef S) const { + return is_contained(PosMatchers, S) && !is_contained(NegMatchers, S); } - bool matches(StringRef S) const { return is_contained(Matchers, S); } - bool empty() const { return Matchers.empty(); } + bool empty() const { return PosMatchers.empty() && NegMatchers.empty(); } }; // Configuration for copying/stripping a single file. @@ -214,8 +245,11 @@ // ParseObjcopyOptions returns the config and sets the input arguments. If a // help flag is set then ParseObjcopyOptions will print the help messege and -// exit. -Expected parseObjcopyOptions(ArrayRef ArgsArr); +// exit. ErrorCallback is used to handle recoverable errors. An Error returned +// by the callback aborts the parsing and is then returned by this function. +Expected +parseObjcopyOptions(ArrayRef ArgsArr, + llvm::function_ref ErrorCallback); // ParseStripOptions returns the config and sets the input arguments. If a // help flag is set then ParseStripOptions will print the help messege and @@ -223,7 +257,7 @@ // by the callback aborts the parsing and is then returned by this function. Expected parseStripOptions(ArrayRef ArgsArr, - std::function ErrorCallback); + llvm::function_ref ErrorCallback); } // namespace objcopy } // namespace llvm diff --git a/llvm/tools/llvm-objcopy/CopyConfig.cpp b/llvm/tools/llvm-objcopy/CopyConfig.cpp --- a/llvm/tools/llvm-objcopy/CopyConfig.cpp +++ b/llvm/tools/llvm-objcopy/CopyConfig.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "CopyConfig.h" +#include "llvm-objcopy.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" @@ -260,8 +261,10 @@ return {TargetInfo{Format, MI}}; } -static Error addSymbolsFromFile(NameMatcher &Symbols, BumpPtrAllocator &Alloc, - StringRef Filename, bool UseRegex) { +static Error +addSymbolsFromFile(NameMatcher &Symbols, BumpPtrAllocator &Alloc, + StringRef Filename, MatchStyle MS, + llvm::function_ref ErrorCallback) { StringSaver Saver(Alloc); SmallVector Lines; auto BufOrErr = MemoryBuffer::getFile(Filename); @@ -274,21 +277,46 @@ // it's not empty. auto TrimmedLine = Line.split('#').first.trim(); if (!TrimmedLine.empty()) - Symbols.addMatcher({Saver.save(TrimmedLine), UseRegex}); + if (Error E = Symbols.addMatcher(NameOrPattern::create( + Saver.save(TrimmedLine), MS, ErrorCallback))) + return E; } return Error::success(); } -NameOrRegex::NameOrRegex(StringRef Pattern, bool IsRegex) { - if (!IsRegex) { - Name = Pattern; - return; - } +Expected +NameOrPattern::create(StringRef Pattern, MatchStyle MS, + llvm::function_ref ErrorCallback) { + switch (MS) { + case MatchStyle::Literal: + return NameOrPattern(Pattern); + case MatchStyle::Wildcard: { + SmallVector Data; + bool IsPositiveMatch = true; + if (Pattern[0] == '!') { + IsPositiveMatch = false; + Pattern = Pattern.drop_front(); + } + Expected GlobOrErr = GlobPattern::create(Pattern); + + // If we couldn't create it as a glob, report the error, but try again with + // a literal if the error reporting is non-fatal. + if (!GlobOrErr) { + if (Error E = ErrorCallback(GlobOrErr.takeError())) + return std::move(E); + return create(Pattern, MatchStyle::Literal, ErrorCallback); + } - SmallVector Data; - R = std::make_shared( - ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data)); + return NameOrPattern(std::make_shared(*GlobOrErr), + IsPositiveMatch); + } + case MatchStyle::Regex: { + SmallVector Data; + return NameOrPattern(std::make_shared( + ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data))); + } + } } static Error addSymbolsToRenameFromFile(StringMap &SymbolsToRename, @@ -338,7 +366,9 @@ // ParseObjcopyOptions returns the config and sets the input arguments. If a // help flag is set then ParseObjcopyOptions will print the help messege and // exit. -Expected parseObjcopyOptions(ArrayRef ArgsArr) { +Expected +parseObjcopyOptions(ArrayRef ArgsArr, + llvm::function_ref ErrorCallback) { DriverConfig DC; ObjcopyOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; @@ -387,7 +417,18 @@ errc::invalid_argument, "--target cannot be used with --input-target or --output-target"); - bool UseRegex = InputArgs.hasArg(OBJCOPY_regex); + if (InputArgs.hasArg(OBJCOPY_regex) && InputArgs.hasArg(OBJCOPY_wildcard)) + return createStringError(errc::invalid_argument, + "--regex and --wildcard are incompatible"); + + MatchStyle SectionMatchStyle = InputArgs.hasArg(OBJCOPY_regex) + ? MatchStyle::Regex + : MatchStyle::Wildcard; + MatchStyle SymbolMatchStyle = InputArgs.hasArg(OBJCOPY_regex) + ? MatchStyle::Regex + : InputArgs.hasArg(OBJCOPY_wildcard) + ? MatchStyle::Wildcard + : MatchStyle::Literal; StringRef InputFormat, OutputFormat; if (InputArgs.hasArg(OBJCOPY_target)) { InputFormat = InputArgs.getLastArgValue(OBJCOPY_target); @@ -541,11 +582,17 @@ } for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section)) - Config.ToRemove.addMatcher({Arg->getValue(), UseRegex}); + if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_section)) - Config.KeepSection.addMatcher({Arg->getValue(), UseRegex}); + if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_only_section)) - Config.OnlySection.addMatcher({Arg->getValue(), UseRegex}); + if (Error E = Config.OnlySection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_add_section)) { StringRef ArgValue(Arg->getValue()); if (!ArgValue.contains('=')) @@ -583,46 +630,68 @@ if (Config.DiscardMode == DiscardType::All) Config.StripDebug = true; for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol)) - Config.SymbolsToLocalize.addMatcher({Arg->getValue(), UseRegex}); + if (Error E = Config.SymbolsToLocalize.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToLocalize, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol)) - Config.SymbolsToKeepGlobal.addMatcher({Arg->getValue(), UseRegex}); + if (Error E = Config.SymbolsToKeepGlobal.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToKeepGlobal, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol)) - Config.SymbolsToGlobalize.addMatcher({Arg->getValue(), UseRegex}); + if (Error E = Config.SymbolsToGlobalize.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToGlobalize, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol)) - Config.SymbolsToWeaken.addMatcher({Arg->getValue(), UseRegex}); + if (Error E = Config.SymbolsToWeaken.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToWeaken, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol)) - Config.SymbolsToRemove.addMatcher({Arg->getValue(), UseRegex}); + if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToRemove, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbol)) - Config.UnneededSymbolsToRemove.addMatcher({Arg->getValue(), UseRegex}); + if (Error E = + Config.UnneededSymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbols)) if (Error E = addSymbolsFromFile(Config.UnneededSymbolsToRemove, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol)) - Config.SymbolsToKeep.addMatcher({Arg->getValue(), UseRegex}); + if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbols)) - if (Error E = addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc, - Arg->getValue(), UseRegex)) + if (Error E = + addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc, Arg->getValue(), + SymbolMatchStyle, ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_add_symbol)) Config.SymbolsToAdd.push_back(Arg->getValue()); @@ -688,7 +757,7 @@ // exit. Expected parseStripOptions(ArrayRef ArgsArr, - std::function ErrorCallback) { + llvm::function_ref ErrorCallback) { StripOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; llvm::opt::InputArgList InputArgs = @@ -726,7 +795,17 @@ "multiple input files cannot be used in combination with -o"); CopyConfig Config; - bool UseRegexp = InputArgs.hasArg(STRIP_regex); + + if (InputArgs.hasArg(STRIP_regex) && InputArgs.hasArg(STRIP_wildcard)) + return createStringError(errc::invalid_argument, + "--regex and --wildcard are incompatible"); + MatchStyle SectionMatchStyle = + InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex : MatchStyle::Wildcard; + MatchStyle SymbolMatchStyle = InputArgs.hasArg(STRIP_regex) + ? MatchStyle::Regex + : InputArgs.hasArg(STRIP_wildcard) + ? MatchStyle::Wildcard + : MatchStyle::Literal; Config.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links); Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug); @@ -744,16 +823,24 @@ Config.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols); for (auto Arg : InputArgs.filtered(STRIP_keep_section)) - Config.KeepSection.addMatcher({Arg->getValue(), UseRegexp}); + if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(STRIP_remove_section)) - Config.ToRemove.addMatcher({Arg->getValue(), UseRegexp}); + if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(STRIP_strip_symbol)) - Config.SymbolsToRemove.addMatcher({Arg->getValue(), UseRegexp}); + if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(STRIP_keep_symbol)) - Config.SymbolsToKeep.addMatcher({Arg->getValue(), UseRegexp}); + if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); if (!InputArgs.hasArg(STRIP_no_strip_all) && !Config.StripDebug && !Config.StripUnneeded && Config.DiscardMode == DiscardType::None && diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp --- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -335,7 +335,7 @@ Expected DriverConfig = IsStrip ? parseStripOptions(Args, reportWarning) - : parseObjcopyOptions(Args); + : parseObjcopyOptions(Args, reportWarning); if (!DriverConfig) { logAllUnhandledErrors(DriverConfig.takeError(), WithColor::error(errs(), ToolName)); diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp --- a/llvm/unittests/Support/GlobPatternTest.cpp +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -14,57 +14,115 @@ class GlobPatternTest : public ::testing::Test {}; -TEST_F(GlobPatternTest, Basics) { +TEST_F(GlobPatternTest, Empty) { Expected Pat1 = GlobPattern::create(""); EXPECT_TRUE((bool)Pat1); EXPECT_TRUE(Pat1->match("")); EXPECT_FALSE(Pat1->match("a")); +} + +TEST_F(GlobPatternTest, Glob) { + Expected Pat1 = GlobPattern::create("ab*c*def"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("abcdef")); + EXPECT_TRUE(Pat1->match("abxcxdef")); + EXPECT_FALSE(Pat1->match("")); + EXPECT_FALSE(Pat1->match("xabcdef")); + EXPECT_FALSE(Pat1->match("abcdefx")); +} + +TEST_F(GlobPatternTest, Wildcard) { + Expected Pat1 = GlobPattern::create("a??c"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("axxc")); + EXPECT_FALSE(Pat1->match("axxx")); + EXPECT_FALSE(Pat1->match("")); +} - Expected Pat2 = GlobPattern::create("ab*c*def"); +TEST_F(GlobPatternTest, Escape) { + Expected Pat1 = GlobPattern::create("\\*"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("*")); + EXPECT_FALSE(Pat1->match("\\*")); + EXPECT_FALSE(Pat1->match("a")); + + Expected Pat2 = GlobPattern::create("a?\\?c"); EXPECT_TRUE((bool)Pat2); - EXPECT_TRUE(Pat2->match("abcdef")); - EXPECT_TRUE(Pat2->match("abxcxdef")); + EXPECT_TRUE(Pat2->match("ax?c")); + EXPECT_FALSE(Pat2->match("axxc")); EXPECT_FALSE(Pat2->match("")); - EXPECT_FALSE(Pat2->match("xabcdef")); - EXPECT_FALSE(Pat2->match("abcdefx")); - - Expected Pat3 = GlobPattern::create("a??c"); - EXPECT_TRUE((bool)Pat3); - EXPECT_TRUE(Pat3->match("axxc")); - EXPECT_FALSE(Pat3->match("axxx")); - EXPECT_FALSE(Pat3->match("")); - - Expected Pat4 = GlobPattern::create("[abc-fy-z]"); - EXPECT_TRUE((bool)Pat4); - EXPECT_TRUE(Pat4->match("a")); - EXPECT_TRUE(Pat4->match("b")); - EXPECT_TRUE(Pat4->match("c")); - EXPECT_TRUE(Pat4->match("d")); - EXPECT_TRUE(Pat4->match("e")); - EXPECT_TRUE(Pat4->match("f")); - EXPECT_TRUE(Pat4->match("y")); - EXPECT_TRUE(Pat4->match("z")); - EXPECT_FALSE(Pat4->match("g")); - EXPECT_FALSE(Pat4->match("")); - - Expected Pat5 = GlobPattern::create("[^abc-fy-z]"); - EXPECT_TRUE((bool)Pat5); - EXPECT_TRUE(Pat5->match("g")); - EXPECT_FALSE(Pat5->match("a")); - EXPECT_FALSE(Pat5->match("b")); - EXPECT_FALSE(Pat5->match("c")); - EXPECT_FALSE(Pat5->match("d")); - EXPECT_FALSE(Pat5->match("e")); - EXPECT_FALSE(Pat5->match("f")); - EXPECT_FALSE(Pat5->match("y")); - EXPECT_FALSE(Pat5->match("z")); - EXPECT_FALSE(Pat5->match("")); +} + +TEST_F(GlobPatternTest, BasicCharacterClass) { + Expected Pat1 = GlobPattern::create("[abc-fy-z]"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("a")); + EXPECT_TRUE(Pat1->match("b")); + EXPECT_TRUE(Pat1->match("c")); + EXPECT_TRUE(Pat1->match("d")); + EXPECT_TRUE(Pat1->match("e")); + EXPECT_TRUE(Pat1->match("f")); + EXPECT_TRUE(Pat1->match("y")); + EXPECT_TRUE(Pat1->match("z")); + EXPECT_FALSE(Pat1->match("g")); + EXPECT_FALSE(Pat1->match("")); +} + +TEST_F(GlobPatternTest, NegatedCharacterClass) { + Expected Pat1 = GlobPattern::create("[^abc-fy-z]"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("g")); + EXPECT_FALSE(Pat1->match("a")); + EXPECT_FALSE(Pat1->match("b")); + EXPECT_FALSE(Pat1->match("c")); + EXPECT_FALSE(Pat1->match("d")); + EXPECT_FALSE(Pat1->match("e")); + EXPECT_FALSE(Pat1->match("f")); + EXPECT_FALSE(Pat1->match("y")); + EXPECT_FALSE(Pat1->match("z")); + EXPECT_FALSE(Pat1->match("")); + + Expected Pat2 = GlobPattern::create("[!abc-fy-z]"); + EXPECT_TRUE((bool)Pat2); + EXPECT_TRUE(Pat2->match("g")); + EXPECT_FALSE(Pat2->match("a")); + EXPECT_FALSE(Pat2->match("b")); + EXPECT_FALSE(Pat2->match("c")); + EXPECT_FALSE(Pat2->match("d")); + EXPECT_FALSE(Pat2->match("e")); + EXPECT_FALSE(Pat2->match("f")); + EXPECT_FALSE(Pat2->match("y")); + EXPECT_FALSE(Pat2->match("z")); + EXPECT_FALSE(Pat2->match("")); +} + +TEST_F(GlobPatternTest, BracketFrontOfCharacterClass) { + Expected Pat1 = GlobPattern::create("[]a]x"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("]x")); + EXPECT_TRUE(Pat1->match("ax")); + EXPECT_FALSE(Pat1->match("a]x")); + EXPECT_FALSE(Pat1->match("")); +} + +TEST_F(GlobPatternTest, SpecialCharsInCharacterClass) { + Expected Pat1 = GlobPattern::create("[*?^]"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("*")); + EXPECT_TRUE(Pat1->match("?")); + EXPECT_TRUE(Pat1->match("^")); + EXPECT_FALSE(Pat1->match("*?^")); + EXPECT_FALSE(Pat1->match("")); } TEST_F(GlobPatternTest, Invalid) { Expected Pat1 = GlobPattern::create("["); EXPECT_FALSE((bool)Pat1); handleAllErrors(Pat1.takeError(), [&](ErrorInfoBase &EIB) {}); + + Expected Pat2 = GlobPattern::create("[]"); + EXPECT_FALSE((bool)Pat2); + handleAllErrors(Pat2.takeError(), [&](ErrorInfoBase &EIB) {}); } TEST_F(GlobPatternTest, ExtSym) {