Index: llvm/utils/lit/lit/builtin_commands/diff.py =================================================================== --- llvm/utils/lit/lit/builtin_commands/diff.py +++ llvm/utils/lit/lit/builtin_commands/diff.py @@ -1,6 +1,7 @@ import difflib import functools import getopt +import locale import os import sys @@ -24,37 +25,27 @@ return path, sorted(child_trees) def compareTwoFiles(flags, filepaths): - compare_bytes = False - encoding = None filelines = [] for file in filepaths: - try: - with open(file, 'r') as f: - filelines.append(f.readlines()) - except UnicodeDecodeError: - try: - with io.open(file, 'r', encoding="utf-8") as f: - filelines.append(f.readlines()) - encoding = "utf-8" - except: - compare_bytes = True - - if compare_bytes: - return compareTwoBinaryFiles(flags, filepaths) - else: - return compareTwoTextFiles(flags, filepaths, encoding) + with open(file, 'rb') as file_bin: + filelines.append(file_bin.readlines()) -def compareTwoBinaryFiles(flags, filepaths): - filelines = [] - for file in filepaths: - with open(file, 'rb') as f: - filelines.append(f.readlines()) + try: + return compareTwoTextFiles(flags, filepaths, filelines, + locale.getpreferredencoding(False)) + except UnicodeDecodeError: + try: + return compareTwoTextFiles(flags, filepaths, filelines, "utf-8") + except: + return compareTwoBinaryFiles(flags, filepaths, filelines) +def compareTwoBinaryFiles(flags, filepaths, filelines): + #sys.stderr.write("Trying as binary....\n") exitCode = 0 if hasattr(difflib, 'diff_bytes'): # python 3.5 or newer diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode()) - diffs = [diff.decode() for diff in diffs] + diffs = [diff.decode(errors="replace") for diff in diffs] else: # python 2.7 if flags.unified_diff: @@ -68,15 +59,15 @@ exitCode = 1 return exitCode -def compareTwoTextFiles(flags, filepaths, encoding): +def compareTwoTextFiles(flags, filepaths, filelines_bin, encoding): + #sys.stderr.write("Trying with encoding {}....\n".format(encoding)) filelines = [] - for file in filepaths: - if encoding is None: - with open(file, 'r') as f: - filelines.append(f.readlines()) - else: - with io.open(file, 'r', encoding=encoding) as f: - filelines.append(f.readlines()) + for lines_bin in filelines_bin: + lines = [] + for line_bin in lines_bin: + line = line_bin.decode(encoding=encoding) + lines.append(line) + filelines.append(lines) exitCode = 0 def compose2(f, g): Index: llvm/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt =================================================================== --- /dev/null +++ llvm/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt @@ -0,0 +1,9 @@ +# Check that diff falls back to binary mode if it cannot decode a file. + +# RUN: diff -u diff-in.bin diff-in.bin +# RUN: diff -u diff-in.utf16 diff-in.bin && false || true +# RUN: diff -u diff-in.utf8 diff-in.bin && false || true +# RUN: diff -u diff-in.bin diff-in.utf8 && false || true + +# Fail so lit will print output. +# RUN: false Index: llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 =================================================================== --- /dev/null +++ llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 @@ -0,0 +1,3 @@ +foo +bar +baz Index: llvm/utils/lit/tests/max-failures.py =================================================================== --- llvm/utils/lit/tests/max-failures.py +++ llvm/utils/lit/tests/max-failures.py @@ -8,7 +8,7 @@ # # END. -# CHECK: Failing Tests (27) +# CHECK: Failing Tests (28) # CHECK: Failing Tests (1) # CHECK: Failing Tests (2) # CHECK: error: Option '--max-failures' requires positive integer Index: llvm/utils/lit/tests/shtest-shell.py =================================================================== --- llvm/utils/lit/tests/shtest-shell.py +++ llvm/utils/lit/tests/shtest-shell.py @@ -34,6 +34,58 @@ # CHECK: error: command failed with exit status: 127 # CHECK: *** + +# CHECK: FAIL: shtest-shell :: diff-encodings.txt +# CHECK: *** TEST 'shtest-shell :: diff-encodings.txt' FAILED *** + +# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.bin" +# CHECK-NOT: error + +# CHECK: $ "diff" "-u" "diff-in.utf16" "diff-in.bin" +# CHECK: # command output: +# CHECK-NEXT: --- +# CHECK-NEXT: +++ +# CHECK-NEXT: @@ +# CHECK-NEXT: {{^ .f.o.o.$}} +# CHECK-NEXT: {{^-.b.a.r.$}} +# CHECK-NEXT: {{^\+.b.a.r..}} +# CHECK-NEXT: {{^ .b.a.z.$}} +# CHECK: error: command failed with exit status: 1 +# CHECK: $ "true" + +# CHECK: $ "diff" "-u" "diff-in.utf8" "diff-in.bin" +# CHECK: # command output: +# CHECK-NEXT: --- +# CHECK-NEXT: +++ +# CHECK-NEXT: @@ +# CHECK-NEXT: -foo +# CHECK-NEXT: -bar +# CHECK-NEXT: -baz +# CHECK-NEXT: {{^\+.f.o.o.$}} +# CHECK-NEXT: {{^\+.b.a.r..}} +# CHECK-NEXT: {{^\+.b.a.z.$}} +# CHECK: error: command failed with exit status: 1 +# CHECK: $ "true" + +# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.utf8" +# CHECK: # command output: +# CHECK-NEXT: --- +# CHECK-NEXT: +++ +# CHECK-NEXT: @@ +# CHECK-NEXT: {{^\-.f.o.o.$}} +# CHECK-NEXT: {{^\-.b.a.r..}} +# CHECK-NEXT: {{^\-.b.a.z.$}} +# CHECK-NEXT: +foo +# CHECK-NEXT: +bar +# CHECK-NEXT: +baz +# CHECK: error: command failed with exit status: 1 +# CHECK: $ "true" + +# CHECK: $ "false" + +# CHECK: *** + + # CHECK: FAIL: shtest-shell :: diff-error-1.txt # CHECK: *** TEST 'shtest-shell :: diff-error-1.txt' FAILED *** # CHECK: $ "diff" "-B" "temp1.txt" "temp2.txt" @@ -245,4 +297,4 @@ # CHECK: PASS: shtest-shell :: sequencing-0.txt # CHECK: XFAIL: shtest-shell :: sequencing-1.txt # CHECK: PASS: shtest-shell :: valid-shell.txt -# CHECK: Failing Tests (27) +# CHECK: Failing Tests (28)