Skip to content

Commit 7574241

Browse files
committedDec 27, 2015
[lit] Implement support of per test timeout in lit.
This should work with ShTest (executed externally or internally) and GTest test formats. To set the timeout a new option ``--timeout=`` has been added which specifies the maximum run time of an individual test in seconds. By default this 0 which causes no timeout to be enforced. The timeout can also be set from a lit configuration file by modifying the ``lit_config.maxIndividualTestTime`` property. To implement a timeout we now require the psutil Python module if a timeout is requested. This dependency is confined to the newly added ``lit.util.killProcessAndChildren()``. A note has been added into the TODO document describing how we can remove the dependency on the ``pustil`` module in the future. It would be nice to remove this immediately but that is a lot more work and Daniel Dunbar believes it is better that we get a working implementation first and then improve it. To avoid breaking the existing behaviour the psutil module will not be imported if no timeout is requested. The included testcases are derived from test cases provided by Jonathan Roelofs which were in an previous attempt to add a per test timeout to lit (http://reviews.llvm.org/D6584). Thanks Jonathan! Reviewers: ddunbar, jroelofs, cmatthews, MatzeB Subscribers: cmatthews, llvm-commits Differential Revision: http://reviews.llvm.org/D14706 llvm-svn: 256471
1 parent 756c289 commit 7574241

File tree

17 files changed

+585
-32
lines changed

17 files changed

+585
-32
lines changed
 

‎llvm/utils/lit/TODO

+11-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,17 @@ Miscellaneous
158158

159159
* Support valgrind in all configs, and LLVM style valgrind.
160160

161-
* Support a timeout / ulimit.
161+
* Support ulimit.
162162

163163
* Create an explicit test suite object (instead of using the top-level
164164
TestingConfig object).
165+
166+
* Introduce a wrapper class that has a ``subprocess.Popen`` like interface
167+
but also supports killing the process and all its children and use this for
168+
running tests. This would allow us to implement platform specific methods
169+
for killing a process's children which is needed for a per test timeout. On
170+
POSIX platforms we can use process groups and on Windows we can probably use
171+
job objects. This would not only allow us to remove the dependency on the
172+
``psutil`` module but would also be more reliable as the
173+
``lit.util.killProcessAndChildren()`` function which is currently used is
174+
potentially racey (e.g. it might not kill a fork bomb completely).

‎llvm/utils/lit/lit/LitConfig.py

+34-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
import lit.TestingConfig
99
import lit.util
1010

11-
class LitConfig:
11+
# LitConfig must be a new style class for properties to work
12+
class LitConfig(object):
1213
"""LitConfig - Configuration data for a 'lit' test runner instance, shared
1314
across all tests.
1415
@@ -21,7 +22,8 @@ class LitConfig:
2122
def __init__(self, progname, path, quiet,
2223
useValgrind, valgrindLeakCheck, valgrindArgs,
2324
noExecute, debug, isWindows,
24-
params, config_prefix = None):
25+
params, config_prefix = None,
26+
maxIndividualTestTime = 0):
2527
# The name of the test runner.
2628
self.progname = progname
2729
# The items to add to the PATH environment variable.
@@ -57,6 +59,36 @@ def __init__(self, progname, path, quiet,
5759
self.valgrindArgs.append('--leak-check=no')
5860
self.valgrindArgs.extend(self.valgrindUserArgs)
5961

62+
self.maxIndividualTestTime = maxIndividualTestTime
63+
64+
@property
65+
def maxIndividualTestTime(self):
66+
"""
67+
Interface for getting maximum time to spend executing
68+
a single test
69+
"""
70+
return self._maxIndividualTestTime
71+
72+
@maxIndividualTestTime.setter
73+
def maxIndividualTestTime(self, value):
74+
"""
75+
Interface for setting maximum time to spend executing
76+
a single test
77+
"""
78+
self._maxIndividualTestTime = value
79+
if self.maxIndividualTestTime > 0:
80+
# The current implementation needs psutil to set
81+
# a timeout per test. Check it's available.
82+
# See lit.util.killProcessAndChildren()
83+
try:
84+
import psutil
85+
except ImportError:
86+
self.fatal("Setting a timeout per test requires the"
87+
" Python psutil module but it could not be"
88+
" found. Try installing it via pip or via"
89+
" your operating system's package manager.")
90+
elif self.maxIndividualTestTime < 0:
91+
self.fatal('The timeout per test must be >= 0 seconds')
6092

6193
def load_config(self, config, path):
6294
"""load_config(config, path) - Load a config object from an alternate

‎llvm/utils/lit/lit/Test.py

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def __repr__(self):
3333
XPASS = ResultCode('XPASS', True)
3434
UNRESOLVED = ResultCode('UNRESOLVED', True)
3535
UNSUPPORTED = ResultCode('UNSUPPORTED', False)
36+
TIMEOUT = ResultCode('TIMEOUT', True)
3637

3738
# Test metric values.
3839

‎llvm/utils/lit/lit/TestRunner.py

+134-17
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import re
44
import platform
55
import tempfile
6+
import threading
67

78
import lit.ShUtil as ShUtil
89
import lit.Test as Test
@@ -33,28 +34,127 @@ def __init__(self, cwd, env):
3334
self.cwd = cwd
3435
self.env = dict(env)
3536

36-
def executeShCmd(cmd, shenv, results):
37+
class TimeoutHelper(object):
38+
"""
39+
Object used to helper manage enforcing a timeout in
40+
_executeShCmd(). It is passed through recursive calls
41+
to collect processes that have been executed so that when
42+
the timeout happens they can be killed.
43+
"""
44+
def __init__(self, timeout):
45+
self.timeout = timeout
46+
self._procs = []
47+
self._timeoutReached = False
48+
self._doneKillPass = False
49+
# This lock will be used to protect concurrent access
50+
# to _procs and _doneKillPass
51+
self._lock = None
52+
self._timer = None
53+
54+
def cancel(self):
55+
if not self.active():
56+
return
57+
self._timer.cancel()
58+
59+
def active(self):
60+
return self.timeout > 0
61+
62+
def addProcess(self, proc):
63+
if not self.active():
64+
return
65+
needToRunKill = False
66+
with self._lock:
67+
self._procs.append(proc)
68+
# Avoid re-entering the lock by finding out if kill needs to be run
69+
# again here but call it if necessary once we have left the lock.
70+
# We could use a reentrant lock here instead but this code seems
71+
# clearer to me.
72+
needToRunKill = self._doneKillPass
73+
74+
# The initial call to _kill() from the timer thread already happened so
75+
# we need to call it again from this thread, otherwise this process
76+
# will be left to run even though the timeout was already hit
77+
if needToRunKill:
78+
assert self.timeoutReached()
79+
self._kill()
80+
81+
def startTimer(self):
82+
if not self.active():
83+
return
84+
85+
# Do some late initialisation that's only needed
86+
# if there is a timeout set
87+
self._lock = threading.Lock()
88+
self._timer = threading.Timer(self.timeout, self._handleTimeoutReached)
89+
self._timer.start()
90+
91+
def _handleTimeoutReached(self):
92+
self._timeoutReached = True
93+
self._kill()
94+
95+
def timeoutReached(self):
96+
return self._timeoutReached
97+
98+
def _kill(self):
99+
"""
100+
This method may be called multiple times as we might get unlucky
101+
and be in the middle of creating a new process in _executeShCmd()
102+
which won't yet be in ``self._procs``. By locking here and in
103+
addProcess() we should be able to kill processes launched after
104+
the initial call to _kill()
105+
"""
106+
with self._lock:
107+
for p in self._procs:
108+
lit.util.killProcessAndChildren(p.pid)
109+
# Empty the list and note that we've done a pass over the list
110+
self._procs = [] # Python2 doesn't have list.clear()
111+
self._doneKillPass = True
112+
113+
def executeShCmd(cmd, shenv, results, timeout=0):
114+
"""
115+
Wrapper around _executeShCmd that handles
116+
timeout
117+
"""
118+
# Use the helper even when no timeout is required to make
119+
# other code simpler (i.e. avoid bunch of ``!= None`` checks)
120+
timeoutHelper = TimeoutHelper(timeout)
121+
if timeout > 0:
122+
timeoutHelper.startTimer()
123+
finalExitCode = _executeShCmd(cmd, shenv, results, timeoutHelper)
124+
timeoutHelper.cancel()
125+
timeoutInfo = None
126+
if timeoutHelper.timeoutReached():
127+
timeoutInfo = 'Reached timeout of {} seconds'.format(timeout)
128+
129+
return (finalExitCode, timeoutInfo)
130+
131+
def _executeShCmd(cmd, shenv, results, timeoutHelper):
132+
if timeoutHelper.timeoutReached():
133+
# Prevent further recursion if the timeout has been hit
134+
# as we should try avoid launching more processes.
135+
return None
136+
37137
if isinstance(cmd, ShUtil.Seq):
38138
if cmd.op == ';':
39-
res = executeShCmd(cmd.lhs, shenv, results)
40-
return executeShCmd(cmd.rhs, shenv, results)
139+
res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
140+
return _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
41141

42142
if cmd.op == '&':
43143
raise InternalShellError(cmd,"unsupported shell operator: '&'")
44144

45145
if cmd.op == '||':
46-
res = executeShCmd(cmd.lhs, shenv, results)
146+
res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
47147
if res != 0:
48-
res = executeShCmd(cmd.rhs, shenv, results)
148+
res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
49149
return res
50150

51151
if cmd.op == '&&':
52-
res = executeShCmd(cmd.lhs, shenv, results)
152+
res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
53153
if res is None:
54154
return res
55155

56156
if res == 0:
57-
res = executeShCmd(cmd.rhs, shenv, results)
157+
res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
58158
return res
59159

60160
raise ValueError('Unknown shell command: %r' % cmd.op)
@@ -206,6 +306,8 @@ def executeShCmd(cmd, shenv, results):
206306
stderr = stderr,
207307
env = cmd_shenv.env,
208308
close_fds = kUseCloseFDs))
309+
# Let the helper know about this process
310+
timeoutHelper.addProcess(procs[-1])
209311
except OSError as e:
210312
raise InternalShellError(j, 'Could not create process ({}) due to {}'.format(executable, e))
211313

@@ -271,7 +373,7 @@ def to_string(bytes):
271373
except:
272374
err = str(err)
273375

274-
results.append((cmd.commands[i], out, err, res))
376+
results.append((cmd.commands[i], out, err, res, timeoutHelper.timeoutReached()))
275377
if cmd.pipe_err:
276378
# Python treats the exit code as a signed char.
277379
if exitCode is None:
@@ -309,22 +411,25 @@ def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
309411
cmd = ShUtil.Seq(cmd, '&&', c)
310412

311413
results = []
414+
timeoutInfo = None
312415
try:
313416
shenv = ShellEnvironment(cwd, test.config.environment)
314-
exitCode = executeShCmd(cmd, shenv, results)
417+
exitCode, timeoutInfo = executeShCmd(cmd, shenv, results, timeout=litConfig.maxIndividualTestTime)
315418
except InternalShellError:
316419
e = sys.exc_info()[1]
317420
exitCode = 127
318-
results.append((e.command, '', e.message, exitCode))
421+
results.append((e.command, '', e.message, exitCode, False))
319422

320423
out = err = ''
321-
for i,(cmd, cmd_out,cmd_err,res) in enumerate(results):
424+
for i,(cmd, cmd_out, cmd_err, res, timeoutReached) in enumerate(results):
322425
out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
323426
out += 'Command %d Result: %r\n' % (i, res)
427+
if litConfig.maxIndividualTestTime > 0:
428+
out += 'Command %d Reached Timeout: %s\n\n' % (i, str(timeoutReached))
324429
out += 'Command %d Output:\n%s\n\n' % (i, cmd_out)
325430
out += 'Command %d Stderr:\n%s\n\n' % (i, cmd_err)
326431

327-
return out, err, exitCode
432+
return out, err, exitCode, timeoutInfo
328433

329434
def executeScript(test, litConfig, tmpBase, commands, cwd):
330435
bashPath = litConfig.getBashPath();
@@ -359,8 +464,13 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
359464
# run on clang with no real loss.
360465
command = litConfig.valgrindArgs + command
361466

362-
return lit.util.executeCommand(command, cwd=cwd,
363-
env=test.config.environment)
467+
try:
468+
out, err, exitCode = lit.util.executeCommand(command, cwd=cwd,
469+
env=test.config.environment,
470+
timeout=litConfig.maxIndividualTestTime)
471+
return (out, err, exitCode, None)
472+
except lit.util.ExecuteCommandTimeoutException as e:
473+
return (e.out, e.err, e.exitCode, e.msg)
364474

365475
def parseIntegratedTestScriptCommands(source_path, keywords):
366476
"""
@@ -573,16 +683,23 @@ def _runShTest(test, litConfig, useExternalSh, script, tmpBase):
573683
if isinstance(res, lit.Test.Result):
574684
return res
575685

576-
out,err,exitCode = res
686+
out,err,exitCode,timeoutInfo = res
577687
if exitCode == 0:
578688
status = Test.PASS
579689
else:
580-
status = Test.FAIL
690+
if timeoutInfo == None:
691+
status = Test.FAIL
692+
else:
693+
status = Test.TIMEOUT
581694

582695
# Form the output log.
583-
output = """Script:\n--\n%s\n--\nExit Code: %d\n\n""" % (
696+
output = """Script:\n--\n%s\n--\nExit Code: %d\n""" % (
584697
'\n'.join(script), exitCode)
585698

699+
if timeoutInfo != None:
700+
output += """Timeout: %s\n""" % (timeoutInfo,)
701+
output += "\n"
702+
586703
# Append the outputs, if present.
587704
if out:
588705
output += """Command Output (stdout):\n--\n%s\n--\n""" % (out,)

‎llvm/utils/lit/lit/formats/googletest.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,15 @@ def execute(self, test, litConfig):
109109
if litConfig.noExecute:
110110
return lit.Test.PASS, ''
111111

112-
out, err, exitCode = lit.util.executeCommand(
113-
cmd, env=test.config.environment)
112+
try:
113+
out, err, exitCode = lit.util.executeCommand(
114+
cmd, env=test.config.environment,
115+
timeout=litConfig.maxIndividualTestTime)
116+
except lit.util.ExecuteCommandTimeoutException:
117+
return (lit.Test.TIMEOUT,
118+
'Reached timeout of {} seconds'.format(
119+
litConfig.maxIndividualTestTime)
120+
)
114121

115122
if exitCode:
116123
return lit.Test.FAIL, out + err

0 commit comments

Comments
 (0)
Please sign in to comment.