diff --git a/llvm/test/Other/lit-quoting.txt b/llvm/test/Other/lit-quoting.txt --- a/llvm/test/Other/lit-quoting.txt +++ b/llvm/test/Other/lit-quoting.txt @@ -1,2 +1,9 @@ -RUN: echo "\"" | FileCheck %s -CHECK: {{^"$}} +RUN: echo "\"" | FileCheck %s --check-prefix=CHECK1 +RUN: echo '"' | FileCheck %s --check-prefix=CHECK1 +RUN: echo 'a[b\c' | FileCheck %s --check-prefix=CHECK2 +RUN: echo "a[b\\c" | FileCheck %s --check-prefix=CHECK2 +RUN: echo 'a\b\\c\\\\d' | FileCheck %s --check-prefix=CHECK3 +RUN: echo "a\\b\\\\c\\\\\\\\d" | FileCheck %s --check-prefix=CHECK3 +CHECK1: {{^"$}} +CHECK2: {{^a\[b\\c$}} +CHECK3: {{^a\\b\\\\c\\\\\\\\d$}} diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -4,6 +4,7 @@ import itertools import getopt import os, signal, subprocess, sys +import pefile import re import stat import platform @@ -177,7 +178,29 @@ result.extend(expand_glob(arg, cwd)) return result -def quote_windows_command(seq): +def _memoize(f): + cache = {} # Intentionally unbounded, see applySubstitutions() + def memoized(x): + if x not in cache: + cache[x] = f(x) + return cache[x] + return memoized + +@_memoize +def _caching_is_msys(exe): + try: + pe = pefile.PE(exe, fast_load=True) + pe.is_driver() # This loads DIRECTORY_ENTRY_IMPORT after fast_load + if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'): + for imp in pe.DIRECTORY_ENTRY_IMPORT: + dll = imp.dll.decode('utf-8', 'ignore').lower() + if dll == 'msys-2.0.dll': + return True + return False + except: + return False + +def quote_windows_command(seq, executable): """ Reimplement Python's private subprocess.list2cmdline for MSys compatibility @@ -190,11 +213,36 @@ differences. We use the same algorithm from MSDN as CPython - (http://msdn.microsoft.com/en-us/library/17w5ykft.aspx), but we treat more - characters as needing quoting, such as double quotes themselves. + (http://msdn.microsoft.com/en-us/library/17w5ykft.aspx), but we try to + detect if the target executable is an MSys executable, and if it is, + and the argument looks like it contains chars that trigger globbing, + we quote it and double the backslashes in it. + + Unfortunately, it seems to be impossible to quote complex arguments in a + way that both the MSVC CRT and MSys would interpret the same way + consistently. Quoting arguments affect how MSys interpret certain chars + like backslashes. One can also set the env var MSYS=noglob to inhibit + the globbing process (which affects how chars are quoted), but that + breaks a few cases where the globbing handling is required: + + Input MSys app MSys app Regular win32 app + sees with noglob sees sees + 1 a[b\c a[bc a[b\c a[b\c + 2 a\b\\c\\\\d a\b\\c\\\\d a\b\\c\\\\d a\b\\c\\\\d + 3 a\"b a\b a\b a"b + 4 "a[b\c" a[b\c a[b\c a[b\c + 5 "a\b\\c\\\\d" a\b\c\\d a\b\\c\\\\d a\b\\c\\\\d + 6 "a\"b" a"b a\b a"b + + Chars like [ trigger globbing, which break handling of lone backslashes + (case 1). Quoting fixes handling of lone backslashes (case 4), as does + disabling globbing. Quoting with globbing breaks handling of multiple + backslashes (case 5), and disabling globbing breaks handling of escaped + double quotes (case 6). """ result = [] needquote = False + is_msys = _caching_is_msys(executable) for arg in seq: bs_buf = [] @@ -204,6 +252,19 @@ # This logic differs from upstream list2cmdline. needquote = (" " in arg) or ("\t" in arg) or ("\"" in arg) or not arg + + if not needquote and is_msys: + needquote = any(c in arg for c in "?*[\"\'(){}") + + if needquote and is_msys: + result.append('"') + for c in arg: + if c == '\\' or c == '"': + result.append('\\') + result.append(c) + result.append('"') + continue + if needquote: result.append('"') @@ -768,7 +829,7 @@ # On Windows, do our own command line quoting for better compatibility # with some core utility distributions. if kIsWindows: - args = quote_windows_command(args) + args = quote_windows_command(args, executable) try: procs.append(subprocess.Popen(args, cwd=cmd_shenv.cwd, @@ -1150,14 +1211,6 @@ ]) return substitutions -def _memoize(f): - cache = {} # Intentionally unbounded, see applySubstitutions() - def memoized(x): - if x not in cache: - cache[x] = f(x) - return cache[x] - return memoized - @_memoize def _caching_re_compile(r): return re.compile(r)