llvm · Dec 27, 2015
diff --git a/‎llvm/utils/lit/TODO
+11-1 b/‎llvm/utils/lit/TODO
+11-1
diff --git a/‎llvm/utils/lit/lit/LitConfig.py
+34-2 b/‎llvm/utils/lit/lit/LitConfig.py
+34-2
diff --git a/‎llvm/utils/lit/lit/Test.py
+1 b/‎llvm/utils/lit/lit/Test.py
+1
diff --git a/‎llvm/utils/lit/lit/TestRunner.py
+134-17 b/‎llvm/utils/lit/lit/TestRunner.py
+134-17
diff --git a/‎llvm/utils/lit/lit/formats/googletest.py
+9-2 b/‎llvm/utils/lit/lit/formats/googletest.py
+9-2
@@ -158,7 +158,17 @@ Miscellaneous
 
 * Support valgrind in all configs, and LLVM style valgrind.
 
-* Support a timeout / ulimit.
+* Support ulimit.
 
 * Create an explicit test suite object (instead of using the top-level
   TestingConfig object).
+
+* Introduce a wrapper class that has a ``subprocess.Popen`` like interface
+  but also supports killing the process and all its children and use this for
+  running tests.  This would allow us to implement platform specific methods
+  for killing a process's children which is needed for a per test timeout. On
+  POSIX platforms we can use process groups and on Windows we can probably use
+  job objects. This would not only allow us to remove the dependency on the
+  ``psutil`` module but would also be more reliable as the
+  ``lit.util.killProcessAndChildren()`` function which is currently used is
+  potentially racey (e.g. it might not kill a fork bomb completely).
@@ -8,7 +8,8 @@
 import lit.TestingConfig
 import lit.util
 
-class LitConfig:
+# LitConfig must be a new style class for properties to work
+class LitConfig(object):
     """LitConfig - Configuration data for a 'lit' test runner instance, shared
     across all tests.
 
@@ -21,7 +22,8 @@ class LitConfig:
     def __init__(self, progname, path, quiet,
                  useValgrind, valgrindLeakCheck, valgrindArgs,
                  noExecute, debug, isWindows,
-                 params, config_prefix = None):
+                 params, config_prefix = None,
+                 maxIndividualTestTime = 0):
         # The name of the test runner.
         self.progname = progname
         # The items to add to the PATH environment variable.
@@ -57,6 +59,36 @@ def __init__(self, progname, path, quiet,
                 self.valgrindArgs.append('--leak-check=no')
             self.valgrindArgs.extend(self.valgrindUserArgs)
 
+        self.maxIndividualTestTime = maxIndividualTestTime
+
+    @property
+    def maxIndividualTestTime(self):
+        """
+            Interface for getting maximum time to spend executing
+            a single test
+        """
+        return self._maxIndividualTestTime
+
+    @maxIndividualTestTime.setter
+    def maxIndividualTestTime(self, value):
+        """
+            Interface for setting maximum time to spend executing
+            a single test
+        """
+        self._maxIndividualTestTime = value
+        if self.maxIndividualTestTime > 0:
+            # The current implementation needs psutil to set
+            # a timeout per test. Check it's available.
+            # See lit.util.killProcessAndChildren()
+            try:
+                import psutil
+            except ImportError:
+                self.fatal("Setting a timeout per test requires the"
+                           " Python psutil module but it could not be"
+                           " found. Try installing it via pip or via"
+                           " your operating system's package manager.")
+        elif self.maxIndividualTestTime < 0:
+            self.fatal('The timeout per test must be >= 0 seconds')
 
     def load_config(self, config, path):
         """load_config(config, path) - Load a config object from an alternate
 
@@ -33,6 +33,7 @@ def __repr__(self):
 XPASS       = ResultCode('XPASS', True)
 UNRESOLVED  = ResultCode('UNRESOLVED', True)
 UNSUPPORTED = ResultCode('UNSUPPORTED', False)
+TIMEOUT     = ResultCode('TIMEOUT', True)
 
 # Test metric values.
 
 
@@ -3,6 +3,7 @@
 import re
 import platform
 import tempfile
+import threading
 
 import lit.ShUtil as ShUtil
 import lit.Test as Test
@@ -33,28 +34,127 @@ def __init__(self, cwd, env):
         self.cwd = cwd
         self.env = dict(env)
 
-def executeShCmd(cmd, shenv, results):
+class TimeoutHelper(object):
+    """
+        Object used to helper manage enforcing a timeout in
+        _executeShCmd(). It is passed through recursive calls
+        to collect processes that have been executed so that when
+        the timeout happens they can be killed.
+    """
+    def __init__(self, timeout):
+        self.timeout = timeout
+        self._procs = []
+        self._timeoutReached = False
+        self._doneKillPass = False
+        # This lock will be used to protect concurrent access
+        # to _procs and _doneKillPass
+        self._lock = None
+        self._timer = None
+
+    def cancel(self):
+        if not self.active():
+            return
+        self._timer.cancel()
+
+    def active(self):
+        return self.timeout > 0
+
+    def addProcess(self, proc):
+        if not self.active():
+            return
+        needToRunKill = False
+        with self._lock:
+            self._procs.append(proc)
+            # Avoid re-entering the lock by finding out if kill needs to be run
+            # again here but call it if necessary once we have left the lock.
+            # We could use a reentrant lock here instead but this code seems
+            # clearer to me.
+            needToRunKill = self._doneKillPass
+
+        # The initial call to _kill() from the timer thread already happened so
+        # we need to call it again from this thread, otherwise this process
+        # will be left to run even though the timeout was already hit
+        if needToRunKill:
+            assert self.timeoutReached()
+            self._kill()
+
+    def startTimer(self):
+        if not self.active():
+            return
+
+        # Do some late initialisation that's only needed
+        # if there is a timeout set
+        self._lock = threading.Lock()
+        self._timer = threading.Timer(self.timeout, self._handleTimeoutReached)
+        self._timer.start()
+
+    def _handleTimeoutReached(self):
+        self._timeoutReached = True
+        self._kill()
+
+    def timeoutReached(self):
+        return self._timeoutReached
+
+    def _kill(self):
+        """
+            This method may be called multiple times as we might get unlucky
+            and be in the middle of creating a new process in _executeShCmd()
+            which won't yet be in ``self._procs``. By locking here and in
+            addProcess() we should be able to kill processes launched after
+            the initial call to _kill()
+        """
+        with self._lock:
+            for p in self._procs:
+                lit.util.killProcessAndChildren(p.pid)
+            # Empty the list and note that we've done a pass over the list
+            self._procs = [] # Python2 doesn't have list.clear()
+            self._doneKillPass = True
+
+def executeShCmd(cmd, shenv, results, timeout=0):
+    """
+        Wrapper around _executeShCmd that handles
+        timeout
+    """
+    # Use the helper even when no timeout is required to make
+    # other code simpler (i.e. avoid bunch of ``!= None`` checks)
+    timeoutHelper = TimeoutHelper(timeout)
+    if timeout > 0:
+        timeoutHelper.startTimer()
+    finalExitCode = _executeShCmd(cmd, shenv, results, timeoutHelper)
+    timeoutHelper.cancel()
+    timeoutInfo = None
+    if timeoutHelper.timeoutReached():
+        timeoutInfo = 'Reached timeout of {} seconds'.format(timeout)
+
+    return (finalExitCode, timeoutInfo)
+
+def _executeShCmd(cmd, shenv, results, timeoutHelper):
+    if timeoutHelper.timeoutReached():
+        # Prevent further recursion if the timeout has been hit
+        # as we should try avoid launching more processes.
+        return None
+
     if isinstance(cmd, ShUtil.Seq):
         if cmd.op == ';':
-            res = executeShCmd(cmd.lhs, shenv, results)
-            return executeShCmd(cmd.rhs, shenv, results)
+            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
+            return _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
 
         if cmd.op == '&':
             raise InternalShellError(cmd,"unsupported shell operator: '&'")
 
         if cmd.op == '||':
-            res = executeShCmd(cmd.lhs, shenv, results)
+            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
             if res != 0:
-                res = executeShCmd(cmd.rhs, shenv, results)
+                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
             return res
 
         if cmd.op == '&&':
-            res = executeShCmd(cmd.lhs, shenv, results)
+            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
             if res is None:
                 return res
 
             if res == 0:
-                res = executeShCmd(cmd.rhs, shenv, results)
+                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
             return res
 
         raise ValueError('Unknown shell command: %r' % cmd.op)
@@ -206,6 +306,8 @@ def executeShCmd(cmd, shenv, results):
                                           stderr = stderr,
                                           env = cmd_shenv.env,
                                           close_fds = kUseCloseFDs))
+            # Let the helper know about this process
+            timeoutHelper.addProcess(procs[-1])
         except OSError as e:
             raise InternalShellError(j, 'Could not create process ({}) due to {}'.format(executable, e))
 
@@ -271,7 +373,7 @@ def to_string(bytes):
         except:
             err = str(err)
 
-        results.append((cmd.commands[i], out, err, res))
+        results.append((cmd.commands[i], out, err, res, timeoutHelper.timeoutReached()))
         if cmd.pipe_err:
             # Python treats the exit code as a signed char.
             if exitCode is None:
@@ -309,22 +411,25 @@ def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
         cmd = ShUtil.Seq(cmd, '&&', c)
 
     results = []
+    timeoutInfo = None
     try:
         shenv = ShellEnvironment(cwd, test.config.environment)
-        exitCode = executeShCmd(cmd, shenv, results)
+        exitCode, timeoutInfo = executeShCmd(cmd, shenv, results, timeout=litConfig.maxIndividualTestTime)
     except InternalShellError:
         e = sys.exc_info()[1]
         exitCode = 127
-        results.append((e.command, '', e.message, exitCode))
+        results.append((e.command, '', e.message, exitCode, False))
 
     out = err = ''
-    for i,(cmd, cmd_out,cmd_err,res) in enumerate(results):
+    for i,(cmd, cmd_out, cmd_err, res, timeoutReached) in enumerate(results):
         out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
         out += 'Command %d Result: %r\n' % (i, res)
+        if litConfig.maxIndividualTestTime > 0:
+            out += 'Command %d Reached Timeout: %s\n\n' % (i, str(timeoutReached))
         out += 'Command %d Output:\n%s\n\n' % (i, cmd_out)
         out += 'Command %d Stderr:\n%s\n\n' % (i, cmd_err)
 
-    return out, err, exitCode
+    return out, err, exitCode, timeoutInfo
 
 def executeScript(test, litConfig, tmpBase, commands, cwd):
     bashPath = litConfig.getBashPath();
@@ -359,8 +464,13 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
             # run on clang with no real loss.
             command = litConfig.valgrindArgs + command
 
-    return lit.util.executeCommand(command, cwd=cwd,
-                                   env=test.config.environment)
+    try:
+        out, err, exitCode = lit.util.executeCommand(command, cwd=cwd,
+                                       env=test.config.environment,
+                                       timeout=litConfig.maxIndividualTestTime)
+        return (out, err, exitCode, None)
+    except lit.util.ExecuteCommandTimeoutException as e:
+        return (e.out, e.err, e.exitCode, e.msg)
 
 def parseIntegratedTestScriptCommands(source_path, keywords):
     """
@@ -573,16 +683,23 @@ def _runShTest(test, litConfig, useExternalSh, script, tmpBase):
     if isinstance(res, lit.Test.Result):
         return res
 
-    out,err,exitCode = res
+    out,err,exitCode,timeoutInfo = res
     if exitCode == 0:
         status = Test.PASS
     else:
-        status = Test.FAIL
+        if timeoutInfo == None:
+            status = Test.FAIL
+        else:
+            status = Test.TIMEOUT
 
     # Form the output log.
-    output = """Script:\n--\n%s\n--\nExit Code: %d\n\n""" % (
+    output = """Script:\n--\n%s\n--\nExit Code: %d\n""" % (
         '\n'.join(script), exitCode)
 
+    if timeoutInfo != None:
+        output += """Timeout: %s\n""" % (timeoutInfo,)
+    output += "\n"
+
     # Append the outputs, if present.
     if out:
         output += """Command Output (stdout):\n--\n%s\n--\n""" % (out,)
 
@@ -109,8 +109,15 @@ def execute(self, test, litConfig):
         if litConfig.noExecute:
             return lit.Test.PASS, ''
 
-        out, err, exitCode = lit.util.executeCommand(
-            cmd, env=test.config.environment)
+        try:
+            out, err, exitCode = lit.util.executeCommand(
+                cmd, env=test.config.environment,
+                timeout=litConfig.maxIndividualTestTime)
+        except lit.util.ExecuteCommandTimeoutException:
+            return (lit.Test.TIMEOUT,
+                    'Reached timeout of {} seconds'.format(
+                        litConfig.maxIndividualTestTime)
+                   )
 
         if exitCode:
             return lit.Test.FAIL, out + err