diff --git a/llvm/utils/lit/lit/reports.py b/llvm/utils/lit/lit/reports.py --- a/llvm/utils/lit/lit/reports.py +++ b/llvm/utils/lit/lit/reports.py @@ -68,6 +68,23 @@ file.write('\n') +_xml_escape_dict = {c: None for c in range(32) if chr(c) not in ('\t', '\n', '\r')} + + +def remove_invalid_xml_chars(s): + # According to the XML 1.0 spec, control characters other than + # \t,\r, and \n are not permitted anywhere in the document + # (https://www.w3.org/TR/xml/#charsets) and therefore this function + # removes them to produce a valid XML document. + # + # Note: In XML 1.1 only \0 is illegal (https://www.w3.org/TR/xml11/#charsets) + # but lit currently produces XML 1.0 output. + if '\x1b' in s: + print("ESCAPE") + s = s.translate(_xml_escape_dict) + return s + + class XunitReport(object): def __init__(self, output_file): self.output_file = output_file @@ -113,7 +130,15 @@ # terminator we wrap it by creating a new CDATA block. output = test.result.output.replace(']]>', ']]]]>') if isinstance(output, bytes): - output.decode("utf-8", 'ignore') + output = output.decode("utf-8", 'ignore') + + # Failing test output sometimes contains control characters like + # \x1b (e.g. if there was some -fcolor-diagnostics output) which are + # not allowed inside XML files. + # This causes problems with CI systems: for example, the Jenkins + # JUnit XML will throw an exception when ecountering those + # characters and similar problems also occur with GitLab CI. + output = remove_invalid_xml_chars(output) file.write(output) file.write(']]>\n\n') elif test.result.code in self.skipped_codes: diff --git a/llvm/utils/lit/tests/Inputs/shtest-format/external_shell/fail_with_control_chars.txt b/llvm/utils/lit/tests/Inputs/shtest-format/external_shell/fail_with_control_chars.txt new file mode 100644 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-format/external_shell/fail_with_control_chars.txt @@ -0,0 +1,5 @@ +# Run a command that fails and prints control characters on stdout. +# This tests checks that the xunit output correctly escapes them in the XML. +# +# RUN: %{python} %S/write-control-chars.py + diff --git a/llvm/utils/lit/tests/Inputs/shtest-format/external_shell/write-control-chars.py b/llvm/utils/lit/tests/Inputs/shtest-format/external_shell/write-control-chars.py new file mode 100644 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-format/external_shell/write-control-chars.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +from __future__ import print_function +import sys + +print("a line with \x1b[2;30;41mcontrol characters\x1b[0m.") +sys.exit(1) diff --git a/llvm/utils/lit/tests/shtest-format.py b/llvm/utils/lit/tests/shtest-format.py --- a/llvm/utils/lit/tests/shtest-format.py +++ b/llvm/utils/lit/tests/shtest-format.py @@ -27,6 +27,13 @@ # CHECK-NEXT: a line with bad encoding: # CHECK: -- +# CHECK: FAIL: shtest-format :: external_shell/fail_with_control_chars.txt +# CHECK-NEXT: *** TEST 'shtest-format :: external_shell/fail_with_control_chars.txt' FAILED *** +# CHECK: Command Output (stdout): +# CHECK-NEXT: -- +# CHECK-NEXT: a line with {{.*}}control characters{{.*}}. +# CHECK: -- + # CHECK: PASS: shtest-format :: external_shell/pass.txt # CHECK: FAIL: shtest-format :: fail.txt @@ -68,9 +75,10 @@ # CHECK-NEXT: true # CHECK-NEXT: -- -# CHECK: Failed Tests (3) +# CHECK: Failed Tests (4) # CHECK: shtest-format :: external_shell/fail.txt # CHECK: shtest-format :: external_shell/fail_with_bad_encoding.txt +# CHECK: shtest-format :: external_shell/fail_with_control_chars.txt # CHECK: shtest-format :: fail.txt # CHECK: Unexpectedly Passed Tests (1) @@ -81,13 +89,13 @@ # CHECK: Passed : 6 # CHECK: Expectedly Failed : 4 # CHECK: Unresolved : 3 -# CHECK: Failed : 3 +# CHECK: Failed : 4 # CHECK: Unexpectedly Passed: 1 # XUNIT: # XUNIT-NEXT: -# XUNIT-NEXT: +# XUNIT-NEXT: # XUNIT: # XUNIT-NEXT: @@ -100,6 +108,14 @@ # XUNIT: # XUNIT-NEXT: +# XUNIT: +# XUNIT-NEXT: +# XUNIT-NEXT: + # XUNIT: # XUNIT: