Index: llvm/utils/lit/lit/TestRunner.py =================================================================== --- llvm/utils/lit/lit/TestRunner.py +++ llvm/utils/lit/lit/TestRunner.py @@ -1112,11 +1112,16 @@ s = s.replace('&', '\&') return s substitutions.extend([ - ('%{/s:regex_replacement}', regex_escape(sourcepath.replace('\\', '/'))), - ('%{/S:regex_replacement}', regex_escape(sourcedir.replace('\\', '/'))), - ('%{/p:regex_replacement}', regex_escape(sourcedir.replace('\\', '/'))), - ('%{/t:regex_replacement}', regex_escape(tmpBase.replace('\\', '/')) + '.tmp'), - ('%{/T:regex_replacement}', regex_escape(tmpDir.replace('\\', '/'))), + ('%{/s:regex_replacement}', + regex_escape(sourcepath.replace('\\', '/'))), + ('%{/S:regex_replacement}', + regex_escape(sourcedir.replace('\\', '/'))), + ('%{/p:regex_replacement}', + regex_escape(sourcedir.replace('\\', '/'))), + ('%{/t:regex_replacement}', + regex_escape(tmpBase.replace('\\', '/')) + '.tmp'), + ('%{/T:regex_replacement}', + regex_escape(tmpDir.replace('\\', '/'))), ]) # "%:[STpst]" are normalized paths without colons and without a leading @@ -1130,6 +1135,18 @@ ]) return substitutions +def _memoize(f): + cache = {} # Intentionally unbounded, see applySubstitutions() + def memoized(x): + if x not in cache: + cache[x] = f(x) + return cache[x] + return memoized + +@_memoize +def _caching_re_compile(r): + return re.compile(r) + def applySubstitutions(script, substitutions): """Apply substitutions to the script. Allow full regular expression syntax. Replace each matching occurrence of regular expression pattern a with @@ -1139,7 +1156,14 @@ for a,b in substitutions: if kIsWindows: b = b.replace("\\","\\\\") - ln = re.sub(a, b, ln) + # re.compile() has a built-in LRU cache with 512 entries. In some + # test suites lit ends up thrashing that cache, which made e.g. + # check-llvm run 50% slower. Use an explicit, unbounded cache + # to prevent that from happening. Since lit is fairly + # short-lived, since the set of substitutions is fairly small, and + # since thrashing has such bad consequences, not bounding the cache + # seems reasonable. + ln = _caching_re_compile(a).sub(b, ln) # Strip the trailing newline and any extra whitespace. return ln.strip()