diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -2232,7 +2232,7 @@ // by `Environment` due to the same reason above. Diagnostics = llvm::make_unique( IntrusiveRefCntPtr(new DiagnosticIDs), - new DiagnosticOptions); + new DiagnosticOptions, new IgnoringDiagConsumer); SourceMgr = llvm::make_unique(*Diagnostics, *FileMgr); FileID ID = SourceMgr->createFileID(FileMgr->getFile(FileName), SourceLocation(), clang::SrcMgr::C_User); diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1254,9 +1254,8 @@ } Annotator.setCommentLineLevels(AnnotatedLines); - WhitespaceManager Whitespaces( - Env.getSourceManager(), Style, - inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID()))); + WhitespaceManager Whitespaces(Env.getSourceManager(), Style, + inputUsesCRLF(Env.getBufferData())); ContinuationIndenter Indenter(Style, Tokens.getKeywords(), Env.getSourceManager(), Whitespaces, Encoding, BinPackInconclusiveFunctions); @@ -2284,9 +2283,10 @@ return Formatter(Env, Expanded, Status).process(); }); - auto Env = - llvm::make_unique(Code, FileName, Ranges, FirstStartColumn, - NextStartColumn, LastStartColumn); + auto Env = Environment::Create(Code, FileName, Ranges, FirstStartColumn, + NextStartColumn, LastStartColumn); + if (!Env) + return {{}, 0}; llvm::Optional CurrentCode = None; tooling::Replacements Fixes; unsigned Penalty = 0; @@ -2299,10 +2299,12 @@ Penalty += PassFixes.second; if (I + 1 < E) { CurrentCode = std::move(*NewCode); - Env = llvm::make_unique( + Env = Environment::Create( *CurrentCode, FileName, tooling::calculateRangesAfterReplacements(Fixes, Ranges), FirstStartColumn, NextStartColumn, LastStartColumn); + if (!Env) + break; } } } @@ -2327,9 +2329,9 @@ StringRef FileName) { // cleanups only apply to C++ (they mostly concern ctor commas etc.) if (Style.Language != FormatStyle::LK_Cpp) - return tooling::Replacements(); - return Cleaner(Environment(Code, FileName, Ranges), Style).process().first; -} + return {}; + if (auto Env = Environment::Create(Code, FileName, Ranges)) + return Cleaner(*Env, Style).process().first; return {}; } tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, @@ -2345,18 +2347,18 @@ StringRef Code, ArrayRef Ranges, StringRef FileName) { - return NamespaceEndCommentsFixer(Environment(Code, FileName, Ranges), Style) - .process() - .first; + if (auto Env = Environment::Create(Code, FileName, Ranges)) + return NamespaceEndCommentsFixer(*Env, Style).process().first; + return {}; } tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, StringRef FileName) { - return UsingDeclarationsSorter(Environment(Code, FileName, Ranges), Style) - .process() - .first; + if (auto Env = Environment::Create(Code, FileName, Ranges)) + return UsingDeclarationsSorter(*Env, Style).process().first; + return {}; } LangOptions getFormattingLangOpts(const FormatStyle &Style) { @@ -2417,11 +2419,13 @@ // of the code to see if it contains Objective-C. if (Extension.empty() || Extension == ".h") { auto NonEmptyFileName = FileName.empty() ? "guess.h" : FileName; - Environment Env(Code, NonEmptyFileName, /*Ranges=*/{}); - ObjCHeaderStyleGuesser Guesser(Env, getLLVMStyle()); - Guesser.process(); - if (Guesser.isObjC()) - return FormatStyle::LK_ObjC; + if (auto Env = + Environment::Create(Code, NonEmptyFileName, /*Ranges=*/{})) { + ObjCHeaderStyleGuesser Guesser(*Env, getLLVMStyle()); + Guesser.process(); + if (Guesser.isObjC()) + return FormatStyle::LK_ObjC; + } } } return GuessedLanguage; diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -36,7 +36,8 @@ class FormatTokenLexer { public: - FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, + FormatTokenLexer(const SourceManager &SourceMgr, SourceLocation StartLoc, + SourceLocation EndLoc, StringRef BufferData, unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding); ArrayRef lex(); @@ -95,7 +96,9 @@ unsigned TrailingWhitespace; std::unique_ptr Lex; const SourceManager &SourceMgr; - FileID ID; + SourceLocation StartLoc; + SourceLocation EndLoc; + StringRef BufferData; const FormatStyle &Style; IdentifierTable IdentTable; AdditionalKeywords Keywords; diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -22,18 +22,19 @@ namespace clang { namespace format { -FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, +FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, + SourceLocation StartLoc, + SourceLocation EndLoc, StringRef BufferData, unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding) : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), - Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), - Style(Style), IdentTable(getFormattingLangOpts(Style)), - Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), - FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), + Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), + StartLoc(StartLoc), EndLoc(EndLoc), BufferData(BufferData), Style(Style), + IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), + Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), + MacroBlockBeginRegex(Style.MacroBlockBegin), MacroBlockEndRegex(Style.MacroBlockEnd) { - Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, - getFormattingLangOpts(Style))); - Lex->SetKeepWhitespaceMode(true); + resetLexer(0); for (const std::string &ForEachMacro : Style.ForEachMacros) Macros.insert({&IdentTable.get(ForEachMacro), TT_ForEachMacro}); @@ -458,7 +459,7 @@ SourceLocation loc = Offset < Lex->getBuffer().end() ? Lex->getSourceLocation(Offset + 1) - : SourceMgr.getLocForEndOfFile(ID); + : EndLoc; resetLexer(SourceMgr.getFileOffset(loc)); } @@ -479,7 +480,7 @@ HashToken->TokenText = Lex->getBuffer().substr(From, Len); SourceLocation Loc = To < Lex->getBuffer().size() ? Lex->getSourceLocation(CommentBegin + Len) - : SourceMgr.getLocForEndOfFile(ID); + : EndLoc; resetLexer(SourceMgr.getFileOffset(Loc)); } @@ -842,10 +843,9 @@ } void FormatTokenLexer::resetLexer(unsigned Offset) { - StringRef Buffer = SourceMgr.getBufferData(ID); - Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), - getFormattingLangOpts(Style), Buffer.begin(), - Buffer.begin() + Offset, Buffer.end())); + Lex = llvm::make_unique(StartLoc, getFormattingLangOpts(Style), + BufferData.begin(), + BufferData.begin() + Offset, BufferData.end()); Lex->SetKeepWhitespaceMode(true); TrailingWhitespace = 0; } diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp --- a/clang/lib/Format/SortJavaScriptImports.cpp +++ b/clang/lib/Format/SortJavaScriptImports.cpp @@ -119,8 +119,7 @@ class JavaScriptImportSorter : public TokenAnalyzer { public: JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style) - : TokenAnalyzer(Env, Style), - FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {} + : TokenAnalyzer(Env, Style), FileContents(Env.getBufferData()) {} std::pair analyze(TokenAnnotator &Annotator, @@ -443,10 +442,11 @@ StringRef Code, ArrayRef Ranges, StringRef FileName) { - // FIXME: Cursor support. - return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style) - .process() - .first; + if (auto Env = Environment::Create(Code, FileName, Ranges)) { + // FIXME: Cursor support. + return JavaScriptImportSorter(*Env, Style).process().first; + } + return {}; } } // end namespace format diff --git a/clang/lib/Format/TokenAnalyzer.h b/clang/lib/Format/TokenAnalyzer.h --- a/clang/lib/Format/TokenAnalyzer.h +++ b/clang/lib/Format/TokenAnalyzer.h @@ -35,19 +35,18 @@ class Environment { public: - // This sets up an virtual file system with file \p FileName containing the - // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn, - // that the next lines of \p Code should start at \p NextStartColumn, and - // that \p Code should end at \p LastStartColumn if it ends in newline. - // See also the documentation of clang::format::internal::reformat. - Environment(StringRef Code, StringRef FileName, - ArrayRef Ranges, unsigned FirstStartColumn = 0, - unsigned NextStartColumn = 0, unsigned LastStartColumn = 0); - - FileID getFileID() const { return ID; } + static std::unique_ptr Create(StringRef Code, StringRef FileName, + ArrayRef Ranges, + unsigned FirstStartColumn = 0, + unsigned NextStartColumn = 0, + unsigned LastStartColumn = 0); const SourceManager &getSourceManager() const { return SM; } + StringRef getBufferData() const { return BufferData; } + SourceLocation getStartLoc() const { return SM.getLocForStartOfFile(ID); } + SourceLocation getEndLoc() const { return SM.getLocForEndOfFile(ID); } + ArrayRef getCharRanges() const { return CharRanges; } // Returns the column at which the fragment of code managed by this @@ -63,6 +62,16 @@ unsigned getLastStartColumn() const { return LastStartColumn; } private: + // This sets up an virtual file system with file \p FileName containing the + // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn, + // that the next lines of \p Code should start at \p NextStartColumn, and + // that \p Code should end at \p LastStartColumn if it ends in newline. + // See also the documentation of clang::format::internal::reformat. + Environment(StringRef Code, StringRef FileName, + ArrayRef Ranges, unsigned FirstStartColumn, + unsigned NextStartColumn, unsigned LastStartColumn, + bool *InvalidBuffer); + // This is only set if constructed from string. std::unique_ptr VirtualSM; @@ -75,6 +84,7 @@ unsigned FirstStartColumn; unsigned NextStartColumn; unsigned LastStartColumn; + StringRef BufferData; }; class TokenAnalyzer : public UnwrappedLineConsumer { diff --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp --- a/clang/lib/Format/TokenAnalyzer.cpp +++ b/clang/lib/Format/TokenAnalyzer.cpp @@ -33,13 +33,29 @@ namespace clang { namespace format { +std::unique_ptr +Environment::Create(StringRef Code, StringRef FileName, + ArrayRef Ranges, unsigned FirstStartColumn, + unsigned NextStartColumn, unsigned LastStartColumn) { + bool InvalidBuffer = true; + std::unique_ptr Env( + new Environment(Code, FileName, Ranges, FirstStartColumn, NextStartColumn, + LastStartColumn, &InvalidBuffer)); + if (InvalidBuffer) + return nullptr; + return Env; +} + Environment::Environment(StringRef Code, StringRef FileName, ArrayRef Ranges, unsigned FirstStartColumn, unsigned NextStartColumn, - unsigned LastStartColumn) + unsigned LastStartColumn, bool *InvalidBuffer) : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()), ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn), NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) { + BufferData = SM.getBufferData(ID, InvalidBuffer); + if (*InvalidBuffer) + return; SourceLocation StartOfFile = SM.getLocForStartOfFile(ID); for (const tooling::Range &Range : Ranges) { SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); @@ -52,8 +68,7 @@ : Style(Style), Env(Env), AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()), UnwrappedLines(1), - Encoding(encoding::detectEncoding( - Env.getSourceManager().getBufferData(Env.getFileID()))) { + Encoding(encoding::detectEncoding(Env.getBufferData())) { LLVM_DEBUG( llvm::dbgs() << "File encoding: " << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown") @@ -64,7 +79,8 @@ std::pair TokenAnalyzer::process() { tooling::Replacements Result; - FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), + FormatTokenLexer Tokens(Env.getSourceManager(), Env.getStartLoc(), + Env.getEndLoc(), Env.getBufferData(), Env.getFirstStartColumn(), Style, Encoding); UnwrappedLineParser Parser(Style, Tokens.getKeywords(), diff --git a/clang/test/Format/invalid-bom.cpp b/clang/test/Format/invalid-bom.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Format/invalid-bom.cpp @@ -0,0 +1,3 @@ +// REQUIRES: shell +// RUN: echo -en "\xff\xfeinput with invalid BOM" | clang-format - >%t.out +// RUN: grep "input with invalid BOM" %t.out diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -271,6 +271,7 @@ if (SortIncludes.getNumOccurrences() != 0) FormatStyle->SortIncludes = SortIncludes; + unsigned CursorPosition = Cursor; Replacements Replaces = sortIncludes(*FormatStyle, Code->getBuffer(), Ranges, AssumedFileName, &CursorPosition); @@ -299,13 +300,13 @@ outputReplacementsXML(Replaces); outs() << "\n"; - } else { + } else if (!Replaces.empty()) { IntrusiveRefCntPtr InMemoryFileSystem( new llvm::vfs::InMemoryFileSystem); FileManager Files(FileSystemOptions(), InMemoryFileSystem); DiagnosticsEngine Diagnostics( IntrusiveRefCntPtr(new DiagnosticIDs), - new DiagnosticOptions); + new DiagnosticOptions, new IgnoringDiagConsumer); SourceManager Sources(Diagnostics, Files); FileID ID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files, InMemoryFileSystem.get()); @@ -326,6 +327,9 @@ } Rewrite.getEditBuffer(ID).write(outs()); } + } else if (!Inplace) { // Nothing changed, but we still need to dump the input + // as is. + outs() << Code->getBuffer(); } return false; }