diff --git a/clang-tools-extra/clang-tidy/add_new_check.py b/clang-tools-extra/clang-tidy/add_new_check.py --- a/clang-tools-extra/clang-tidy/add_new_check.py +++ b/clang-tools-extra/clang-tidy/add_new_check.py @@ -11,16 +11,21 @@ from __future__ import print_function import argparse +import io import os import re import sys +# The documentation files are encoded using UTF-8, however on Windows the +# default encoding might be different (e.g. CP-1252). To make sure UTF-8 is +# always used, use `io.open(filename, mode, encoding='utf8')` for reading and +# writing files # Adapts the module's CMakelist file. Returns 'True' if it could add a new # entry and 'False' if the entry already existed. def adapt_cmake(module_path, check_name_camel): filename = os.path.join(module_path, 'CMakeLists.txt') - with open(filename, 'r') as f: + with io.open(filename, 'r', encoding='utf8') as f: lines = f.readlines() cpp_file = check_name_camel + '.cpp' @@ -31,7 +36,7 @@ return False print('Updating %s...' % filename) - with open(filename, 'w') as f: + with io.open(filename, 'w', encoding='utf8') as f: cpp_found = False file_added = False for line in lines: @@ -51,7 +56,7 @@ check_name_dashes = module + '-' + check_name filename = os.path.join(module_path, check_name_camel) + '.h' print('Creating %s...' % filename) - with open(filename, 'w') as f: + with io.open(filename, 'w', encoding='utf8') as f: header_guard = ('LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_' + module.upper() + '_' + check_name_camel.upper() + '_H') f.write('//===--- ') @@ -104,7 +109,7 @@ def write_implementation(module_path, module, namespace, check_name_camel): filename = os.path.join(module_path, check_name_camel) + '.cpp' print('Creating %s...' % filename) - with open(filename, 'w') as f: + with io.open(filename, 'w', encoding='utf8') as f: f.write('//===--- ') f.write(os.path.basename(filename)) f.write(' - clang-tidy ') @@ -158,11 +163,11 @@ lambda p: p.lower() == module.lower() + 'tidymodule.cpp', os.listdir(module_path)))[0] filename = os.path.join(module_path, modulecpp) - with open(filename, 'r') as f: + with io.open(filename, 'r', encoding='utf8') as f: lines = f.readlines() print('Updating %s...' % filename) - with open(filename, 'w') as f: + with io.open(filename, 'w', encoding='utf8') as f: header_added = False header_found = False check_added = False @@ -217,7 +222,7 @@ check_name_dashes = module + '-' + check_name filename = os.path.normpath(os.path.join(module_path, '../../docs/ReleaseNotes.rst')) - with open(filename, 'r') as f: + with io.open(filename, 'r', encoding='utf8') as f: lines = f.readlines() lineMatcher = re.compile('New checks') @@ -225,7 +230,7 @@ checkMatcher = re.compile('- New :doc:`(.*)') print('Updating %s...' % filename) - with open(filename, 'w') as f: + with io.open(filename, 'w', encoding='utf8') as f: note_added = False header_found = False add_note_here = False @@ -271,7 +276,7 @@ filename = os.path.normpath(os.path.join(module_path, '../../test/clang-tidy/checkers', check_name_dashes + '.' + test_extension)) print('Creating %s...' % filename) - with open(filename, 'w') as f: + with io.open(filename, 'w', encoding='utf8') as f: f.write("""// RUN: %%check_clang_tidy %%s %(check_name_dashes)s %%t // FIXME: Add something that triggers the check here. @@ -307,7 +312,7 @@ docs_dir = os.path.join(clang_tidy_path, '../docs/clang-tidy/checks') filename = os.path.normpath(os.path.join(docs_dir, 'list.rst')) # Read the content of the current list.rst file - with open(filename, 'r') as f: + with io.open(filename, 'r', encoding='utf8') as f: lines = f.readlines() # Get all existing docs doc_files = list(filter(lambda s: s.endswith('.rst') and s != 'list.rst', @@ -323,7 +328,7 @@ if not os.path.isfile(checkerCode): return "" - with open(checkerCode) as f: + with io.open(checkerCode) as , encoding='utf8'f: code = f.read() if 'FixItHint' in code or "ReplacementText" in code or "fixit" in code: # Some simple heuristics to figure out if a checker has an autofix or not. @@ -333,7 +338,7 @@ def process_doc(doc_file): check_name = doc_file.replace('.rst', '') - with open(os.path.join(docs_dir, doc_file), 'r') as doc: + with io.open(os.path.join(, encoding='utf8'docs_dir, doc_file), 'r') as doc: content = doc.read() match = re.search('.*:orphan:.*', content) @@ -376,7 +381,7 @@ checks_alias = map(format_link_alias, doc_files) print('Updating %s...' % filename) - with open(filename, 'w') as f: + with io.open(filename, 'w', encoding='utf8') as f: for line in lines: f.write(line) if line.strip() == ".. csv-table::": @@ -397,7 +402,7 @@ filename = os.path.normpath(os.path.join( module_path, '../../docs/clang-tidy/checks/', check_name_dashes + '.rst')) print('Creating %s...' % filename) - with open(filename, 'w') as f: + with io.open(filename, 'w', encoding='utf8') as f: f.write(""".. title:: clang-tidy - %(check_name_dashes)s %(check_name_dashes)s diff --git a/clang-tools-extra/clang-tidy/rename_check.py b/clang-tools-extra/clang-tidy/rename_check.py --- a/clang-tools-extra/clang-tidy/rename_check.py +++ b/clang-tools-extra/clang-tidy/rename_check.py @@ -10,20 +10,25 @@ import argparse import glob +import io import os import re +# The documentation files are encoded using UTF-8, however on Windows the +# default encoding might be different (e.g. CP-1252). To make sure UTF-8 is +# always used, use `io.open(filename, mode, encoding='utf8')` for reading and +# writing files. def replaceInFileRegex(fileName, sFrom, sTo): if sFrom == sTo: return txt = None - with open(fileName, "r") as f: + with io.open(fileName, 'r', encoding='utf8') as f: txt = f.read() txt = re.sub(sFrom, sTo, txt) print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName)) - with open(fileName, "w") as f: + with io.open(fileName, 'w', encoding='utf8') as f: f.write(txt) @@ -31,7 +36,7 @@ if sFrom == sTo: return txt = None - with open(fileName, "r") as f: + with io.open(fileName, 'r', encoding='utf8') as f: txt = f.read() if sFrom not in txt: @@ -39,7 +44,7 @@ txt = txt.replace(sFrom, sTo) print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName)) - with open(fileName, "w") as f: + with io.open(fileName, 'w', encoding='utf8') as f: f.write(txt) @@ -70,7 +75,7 @@ def deleteMatchingLines(fileName, pattern): lines = None - with open(fileName, "r") as f: + with io.open(fileName, 'r', encoding='utf8') as f: lines = f.readlines() not_matching_lines = [l for l in lines if not re.search(pattern, l)] @@ -79,7 +84,7 @@ print("Removing lines matching '%s' in '%s'..." % (pattern, fileName)) print(' ' + ' '.join([l for l in lines if re.search(pattern, l)])) - with open(fileName, "w") as f: + with io.open(fileName, 'w', encoding='utf8') as f: f.writelines(not_matching_lines) return True @@ -101,7 +106,7 @@ # entry and 'False' if the entry already existed. def adapt_cmake(module_path, check_name_camel): filename = os.path.join(module_path, 'CMakeLists.txt') - with open(filename, 'r') as f: + with io.open(filename, 'r', encoding='utf8') as f: lines = f.readlines() cpp_file = check_name_camel + '.cpp' @@ -112,7 +117,7 @@ return False print('Updating %s...' % filename) - with open(filename, 'wb') as f: + with io.open(filename, 'wb', encoding='utf8') as f: cpp_found = False file_added = False for line in lines: @@ -130,11 +135,11 @@ def adapt_module(module_path, module, check_name, check_name_camel): modulecpp = next(filter(lambda p: p.lower() == module.lower() + 'tidymodule.cpp', os.listdir(module_path))) filename = os.path.join(module_path, modulecpp) - with open(filename, 'r') as f: + with io.open(filename, 'r', encoding='utf8') as f: lines = f.readlines() print('Updating %s...' % filename) - with open(filename, 'wb') as f: + with io.open(filename, 'wb', encoding='utf8') as f: header_added = False header_found = False check_added = False @@ -169,7 +174,7 @@ def add_release_notes(clang_tidy_path, old_check_name, new_check_name): filename = os.path.normpath(os.path.join(clang_tidy_path, '../docs/ReleaseNotes.rst')) - with open(filename, 'r') as f: + with io.open(filename, 'r', encoding='utf8') as f: lines = f.readlines() lineMatcher = re.compile('Renamed checks') @@ -177,7 +182,7 @@ checkMatcher = re.compile('- The \'(.*)') print('Updating %s...' % filename) - with open(filename, 'wb') as f: + with io.open(filename, 'wb', encoding='utf8') as f: note_added = False header_found = False add_note_here = False