Index: clang/tools/clang-format/ClangFormat.cpp =================================================================== --- clang/tools/clang-format/ClangFormat.cpp +++ clang/tools/clang-format/ClangFormat.cpp @@ -241,6 +241,30 @@ } } +// Returns an invalid BOM +static const char *hasInValidBOM(StringRef BufStr) { + // Check to see if the buffer has a UTF Byte Order Mark (BOM). + // We only support UTF-8 with and without a BOM right now. See + // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding + // for more information. + const char *InvalidBOM = + llvm::StringSwitch(BufStr) + .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), + "UTF-32 (BE)") + .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), + "UTF-32 (LE)") + .StartsWith("\xFE\xFF", "UTF-16 (BE)") + .StartsWith("\xFF\xFE", "UTF-16 (LE)") + .StartsWith("\x2B\x2F\x76", "UTF-7") + .StartsWith("\xF7\x64\x4C", "UTF-1") + .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") + .StartsWith("\x0E\xFE\xFF", "SCSU") + .StartsWith("\xFB\xEE\x28", "BOCU-1") + .StartsWith("\x84\x31\x95\x33", "GB-18030") + .Default(nullptr); + return InvalidBOM; +} + // Returns true on error. static bool format(StringRef FileName) { if (!OutputXML && Inplace && FileName == "-") { @@ -260,26 +284,9 @@ if (Code->getBufferSize() == 0) return false; // Empty files are formatted correctly. - // Check to see if the buffer has a UTF Byte Order Mark (BOM). - // We only support UTF-8 with and without a BOM right now. See - // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding - // for more information. StringRef BufStr = Code->getBuffer(); - const char *InvalidBOM = - llvm::StringSwitch(BufStr) - .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), - "UTF-32 (BE)") - .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), - "UTF-32 (LE)") - .StartsWith("\xFE\xFF", "UTF-16 (BE)") - .StartsWith("\xFF\xFE", "UTF-16 (LE)") - .StartsWith("\x2B\x2F\x76", "UTF-7") - .StartsWith("\xF7\x64\x4C", "UTF-1") - .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") - .StartsWith("\x0E\xFE\xFF", "SCSU") - .StartsWith("\xFB\xEE\x28", "BOCU-1") - .StartsWith("\x84\x31\x95\x33", "GB-18030") - .Default(nullptr); + + const char *InvalidBOM = hasInValidBOM(BufStr); if (InvalidBOM) { errs() << "error: encoding with unsupported byte order mark \"" @@ -370,6 +377,38 @@ OS << clang::getClangToolFullVersion("clang-format") << '\n'; } +// Dump the configuration. +static unsigned dumpConfig(StringRef AssumeFileName) { + StringRef FileName; + std::unique_ptr Code; + if (FileNames.empty()) { + // We can't read the code to detect the language if there's no + // file name, so leave Code empty here. + FileName = AssumeFileName; + } else { + // Read in the code in case the filename alone isn't enough to + // detect the language. + ErrorOr> CodeOrErr = + MemoryBuffer::getFileOrSTDIN(FileNames[0]); + if (std::error_code EC = CodeOrErr.getError()) { + llvm::errs() << EC.message() << "\n"; + return 1; + } + FileName = (FileNames[0] == "-") ? AssumeFileName : FileNames[0]; + Code = std::move(CodeOrErr.get()); + } + llvm::Expected FormatStyle = + clang::format::getStyle(Style, FileName, FallbackStyle, + Code ? Code->getBuffer() : ""); + if (!FormatStyle) { + llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n"; + return 1; + } + std::string Config = clang::format::configurationAsText(*FormatStyle); + outs() << Config << "\n"; + return 0; +} + int main(int argc, const char **argv) { llvm::InitLLVM X(argc, argv); @@ -391,34 +430,7 @@ } if (DumpConfig) { - StringRef FileName; - std::unique_ptr Code; - if (FileNames.empty()) { - // We can't read the code to detect the language if there's no - // file name, so leave Code empty here. - FileName = AssumeFileName; - } else { - // Read in the code in case the filename alone isn't enough to - // detect the language. - ErrorOr> CodeOrErr = - MemoryBuffer::getFileOrSTDIN(FileNames[0]); - if (std::error_code EC = CodeOrErr.getError()) { - llvm::errs() << EC.message() << "\n"; - return 1; - } - FileName = (FileNames[0] == "-") ? AssumeFileName : FileNames[0]; - Code = std::move(CodeOrErr.get()); - } - llvm::Expected FormatStyle = - clang::format::getStyle(Style, FileName, FallbackStyle, - Code ? Code->getBuffer() : ""); - if (!FormatStyle) { - llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n"; - return 1; - } - std::string Config = clang::format::configurationAsText(*FormatStyle); - outs() << Config << "\n"; - return 0; + return dumpConfig(AssumeFileName); } bool Error = false;