Index: include/clang/Format/Format.h =================================================================== --- include/clang/Format/Format.h +++ include/clang/Format/Format.h @@ -766,6 +766,12 @@ const tooling::Replacements &Replaces, const FormatStyle &Style); +/// \brief Returns the replacements corresponding to applying \p Replaces and +/// cleaning up the code after that. +tooling::Replacements +cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, + const FormatStyle &Style); + /// \brief Reformats the given \p Ranges in the file \p ID. /// /// Each range is extended on either end to its next bigger logic unit, i.e. @@ -791,6 +797,22 @@ StringRef FileName = "", bool *IncompleteFormat = nullptr); +/// \brief Clean up any erroneous/redundant code in the given \p Ranges in the +/// file \p ID. +/// +/// Returns the ``Replacements`` that clean up all \p Ranges in the file \p ID. +tooling::Replacements cleanup(const FormatStyle &Style, + SourceManager &SourceMgr, FileID ID, + ArrayRef Ranges); + +/// \brief Clean up any erroneous/redundant code in the given \p Ranges in \p +/// Code. +/// +/// Otherwise identical to the cleanup() function using a file ID. +tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, + ArrayRef Ranges, + StringRef FileName = ""); + /// \brief Returns the ``LangOpts`` that the formatter expects you to set. /// /// \param Style determines specific settings for lexing mode. Index: lib/Format/AffectedRangeManager.h =================================================================== --- /dev/null +++ lib/Format/AffectedRangeManager.h @@ -0,0 +1,66 @@ +//===--- AffectedRangeManager.h - Format C++ code ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief AffectedRangeManager class manages affected ranges in the code. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H +#define LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H + +#include "clang/Basic/SourceManager.h" + +namespace clang { +namespace format { + +struct FormatToken; +class AnnotatedLine; + +class AffectedRangeManager { +public: + AffectedRangeManager(SourceManager &SourceMgr, + const SmallVector &Ranges) + : SourceMgr(SourceMgr), Ranges(Ranges) {} + + // Determines which lines are affected by the SourceRanges given as input. + // Returns \c true if at least one line between I and E or one of their + // children is affected. + bool computeAffectedLines(SmallVectorImpl::iterator I, + SmallVectorImpl::iterator E); + + // Returns true if 'Range' intersects with one of the input ranges. + bool affectsCharSourceRange(const CharSourceRange &Range); + +private: + // Returns true if the range from 'First' to 'Last' intersects with one of the + // input ranges. + bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, + bool IncludeLeadingNewlines); + + // Returns true if one of the input ranges intersect the leading empty lines + // before 'Tok'. + bool affectsLeadingEmptyLines(const FormatToken &Tok); + + // Marks all lines between I and E as well as all their children as affected. + void markAllAsAffected(SmallVectorImpl::iterator I, + SmallVectorImpl::iterator E); + + // Determines whether 'Line' is affected by the SourceRanges given as input. + // Returns \c true if line or one if its children is affected. + bool nonPPLineAffected(AnnotatedLine *Line, + const AnnotatedLine *PreviousLine); + SourceManager &SourceMgr; + const SmallVector Ranges; +}; + +} // namespace format +} // namespace clang + +#endif // LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H Index: lib/Format/AffectedRangeManager.cpp =================================================================== --- /dev/null +++ lib/Format/AffectedRangeManager.cpp @@ -0,0 +1,150 @@ +//===--- AffectedRangeManager.cpp - Format C++ code -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements AffectRangeManager class. +/// +//===----------------------------------------------------------------------===// + +#include "AffectedRangeManager.h" + +#include "FormatToken.h" +#include "TokenAnnotator.h" + +namespace clang { +namespace format { + +bool AffectedRangeManager::computeAffectedLines( + SmallVectorImpl::iterator I, + SmallVectorImpl::iterator E) { + bool SomeLineAffected = false; + const AnnotatedLine *PreviousLine = nullptr; + while (I != E) { + AnnotatedLine *Line = *I; + Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); + + // If a line is part of a preprocessor directive, it needs to be formatted + // if any token within the directive is affected. + if (Line->InPPDirective) { + FormatToken *Last = Line->Last; + SmallVectorImpl::iterator PPEnd = I + 1; + while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { + Last = (*PPEnd)->Last; + ++PPEnd; + } + + if (affectsTokenRange(*Line->First, *Last, + /*IncludeLeadingNewlines=*/false)) { + SomeLineAffected = true; + markAllAsAffected(I, PPEnd); + } + I = PPEnd; + continue; + } + + if (nonPPLineAffected(Line, PreviousLine)) + SomeLineAffected = true; + + PreviousLine = Line; + ++I; + } + return SomeLineAffected; +} + +bool AffectedRangeManager::affectsCharSourceRange( + const CharSourceRange &Range) { + for (SmallVectorImpl::const_iterator I = Ranges.begin(), + E = Ranges.end(); + I != E; ++I) { + if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && + !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) + return true; + } + return false; +} + +bool AffectedRangeManager::affectsTokenRange(const FormatToken &First, + const FormatToken &Last, + bool IncludeLeadingNewlines) { + SourceLocation Start = First.WhitespaceRange.getBegin(); + if (!IncludeLeadingNewlines) + Start = Start.getLocWithOffset(First.LastNewlineOffset); + SourceLocation End = Last.getStartOfNonWhitespace(); + End = End.getLocWithOffset(Last.TokenText.size()); + CharSourceRange Range = CharSourceRange::getCharRange(Start, End); + return affectsCharSourceRange(Range); +} + +bool AffectedRangeManager::affectsLeadingEmptyLines(const FormatToken &Tok) { + CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( + Tok.WhitespaceRange.getBegin(), + Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); + return affectsCharSourceRange(EmptyLineRange); +} + +void AffectedRangeManager::markAllAsAffected( + SmallVectorImpl::iterator I, + SmallVectorImpl::iterator E) { + while (I != E) { + (*I)->Affected = true; + markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); + ++I; + } +} + +bool AffectedRangeManager::nonPPLineAffected( + AnnotatedLine *Line, const AnnotatedLine *PreviousLine) { + bool SomeLineAffected = false; + Line->ChildrenAffected = + computeAffectedLines(Line->Children.begin(), Line->Children.end()); + if (Line->ChildrenAffected) + SomeLineAffected = true; + + // Stores whether one of the line's tokens is directly affected. + bool SomeTokenAffected = false; + // Stores whether we need to look at the leading newlines of the next token + // in order to determine whether it was affected. + bool IncludeLeadingNewlines = false; + + // Stores whether the first child line of any of this line's tokens is + // affected. + bool SomeFirstChildAffected = false; + + for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { + // Determine whether 'Tok' was affected. + if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) + SomeTokenAffected = true; + + // Determine whether the first child of 'Tok' was affected. + if (!Tok->Children.empty() && Tok->Children.front()->Affected) + SomeFirstChildAffected = true; + + IncludeLeadingNewlines = Tok->Children.empty(); + } + + // Was this line moved, i.e. has it previously been on the same line as an + // affected line? + bool LineMoved = PreviousLine && PreviousLine->Affected && + Line->First->NewlinesBefore == 0; + + bool IsContinuedComment = + Line->First->is(tok::comment) && Line->First->Next == nullptr && + Line->First->NewlinesBefore < 2 && PreviousLine && + PreviousLine->Affected && PreviousLine->Last->is(tok::comment); + + if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || + IsContinuedComment) { + Line->Affected = true; + SomeLineAffected = true; + } + return SomeLineAffected; +} + +} // namespace format +} // namespace clang Index: lib/Format/CMakeLists.txt =================================================================== --- lib/Format/CMakeLists.txt +++ lib/Format/CMakeLists.txt @@ -1,6 +1,7 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangFormat + AffectedRangeManager.cpp BreakableToken.cpp ContinuationIndenter.cpp Format.cpp Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "clang/Format/Format.h" +#include "AffectedRangeManager.h" #include "ContinuationIndenter.h" #include "TokenAnnotator.h" #include "UnwrappedLineFormatter.h" @@ -779,8 +780,8 @@ class FormatTokenLexer { public: - FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, - encoding::Encoding Encoding) + FormatTokenLexer(SourceManager &SourceMgr, FileID ID, + const FormatStyle &Style, encoding::Encoding Encoding) : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), LessStashed(false), Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), Style(Style), @@ -1347,7 +1348,8 @@ Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == tok::pp_define) && std::find(ForEachMacros.begin(), ForEachMacros.end(), - FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) { + FormatTok->Tok.getIdentifierInfo()) != + ForEachMacros.end()) { FormatTok->Type = TT_ForEachMacro; } else if (FormatTok->is(tok::identifier)) { if (MacroBlockBeginRegex.match(Text)) { @@ -1369,7 +1371,7 @@ std::unique_ptr Lex; SourceManager &SourceMgr; FileID ID; - FormatStyle &Style; + const FormatStyle &Style; IdentifierTable IdentTable; AdditionalKeywords Keywords; encoding::Encoding Encoding; @@ -1443,14 +1445,14 @@ } } -class Formatter : public UnwrappedLineConsumer { +class CodeProcessor : public UnwrappedLineConsumer { public: - Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, - ArrayRef Ranges) + CodeProcessor(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, + ArrayRef Ranges) : Style(Style), ID(ID), SourceMgr(SourceMgr), - Whitespaces(SourceMgr, Style, - inputUsesCRLF(SourceMgr.getBufferData(ID))), - Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), + AffectedRangeMgr(SourceMgr, SmallVector( + Ranges.begin(), Ranges.end())), + UnwrappedLines(1), Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) { DEBUG(llvm::dbgs() << "File encoding: " << (Encoding == encoding::Encoding_UTF8 ? "UTF8" @@ -1460,7 +1462,8 @@ << "\n"); } - tooling::Replacements format(bool *IncompleteFormat) { + template + tooling::Replacements process(T Callable, bool *IncompleteFormat) { tooling::Replacements Result; FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); @@ -1472,11 +1475,14 @@ ++Run) { DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); SmallVector AnnotatedLines; + for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); } + tooling::Replacements RunResult = - format(AnnotatedLines, Tokens, Result, IncompleteFormat); + Callable(AnnotatedLines, Tokens, Result, IncompleteFormat); + DEBUG({ llvm::dbgs() << "Replacements for run " << Run << ":\n"; for (tooling::Replacements::iterator I = RunResult.begin(), @@ -1489,21 +1495,57 @@ delete AnnotatedLines[i]; } Result.insert(RunResult.begin(), RunResult.end()); - Whitespaces.reset(); } return Result; } - tooling::Replacements format(SmallVectorImpl &AnnotatedLines, - FormatTokenLexer &Tokens, - tooling::Replacements &Result, - bool *IncompleteFormat) { + const FormatStyle &getFormatStyle() { return Style; } + + FileID getFileID() { return ID; } + + SourceManager &getSourceManager() { return SourceMgr; } + + AffectedRangeManager &getAffectedRangeManager() { return AffectedRangeMgr; } + + encoding::Encoding getEncoding() { return Encoding; } + +private: + void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { + assert(!UnwrappedLines.empty()); + UnwrappedLines.back().push_back(TheLine); + } + + void finishRun() override { + UnwrappedLines.push_back(SmallVector()); + } + + const FormatStyle Style; + FileID ID; + SourceManager &SourceMgr; + // AffectedRangeMgr stores ranges to be fixed. + AffectedRangeManager AffectedRangeMgr; + SmallVector, 2> UnwrappedLines; + + encoding::Encoding Encoding; +}; + +class Formatter { +public: + Formatter(CodeProcessor &Processor) + : Processor(Processor), Style(Processor.getFormatStyle()) {} + + tooling::Replacements + runFormat(SmallVectorImpl &AnnotatedLines, + FormatTokenLexer &Tokens, tooling::Replacements &Result, + bool *IncompleteFormat) { TokenAnnotator Annotator(Style, Tokens.getKeywords()); - for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - Annotator.annotate(*AnnotatedLines[i]); + for (auto *Line : AnnotatedLines) { + Annotator.annotate(*Line); } deriveLocalStyle(AnnotatedLines); - computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); + Processor.getAffectedRangeManager().computeAffectedLines( + AnnotatedLines.begin(), AnnotatedLines.end()); + if (Style.Language == FormatStyle::LK_JavaScript && Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) requoteJSStringLiteral(AnnotatedLines, Result); @@ -1513,9 +1555,14 @@ } Annotator.setCommentLineLevels(AnnotatedLines); - ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr, - Whitespaces, Encoding, - BinPackInconclusiveFunctions); + + WhitespaceManager Whitespaces( + Processor.getSourceManager(), Style, + inputUsesCRLF( + Processor.getSourceManager().getBufferData(Processor.getFileID()))); + ContinuationIndenter Indenter( + Style, Tokens.getKeywords(), Processor.getSourceManager(), Whitespaces, + Processor.getEncoding(), BinPackInconclusiveFunctions); UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), IncompleteFormat) .format(AnnotatedLines); @@ -1523,50 +1570,11 @@ } private: - // Determines which lines are affected by the SourceRanges given as input. - // Returns \c true if at least one line between I and E or one of their - // children is affected. - bool computeAffectedLines(SmallVectorImpl::iterator I, - SmallVectorImpl::iterator E) { - bool SomeLineAffected = false; - const AnnotatedLine *PreviousLine = nullptr; - while (I != E) { - AnnotatedLine *Line = *I; - Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); - - // If a line is part of a preprocessor directive, it needs to be formatted - // if any token within the directive is affected. - if (Line->InPPDirective) { - FormatToken *Last = Line->Last; - SmallVectorImpl::iterator PPEnd = I + 1; - while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { - Last = (*PPEnd)->Last; - ++PPEnd; - } - - if (affectsTokenRange(*Line->First, *Last, - /*IncludeLeadingNewlines=*/false)) { - SomeLineAffected = true; - markAllAsAffected(I, PPEnd); - } - I = PPEnd; - continue; - } - - if (nonPPLineAffected(Line, PreviousLine)) - SomeLineAffected = true; - - PreviousLine = Line; - ++I; - } - return SomeLineAffected; - } - // If the last token is a double/single-quoted string literal, generates a // replacement with a single/double quoted string literal, re-escaping the // contents in the process. void requoteJSStringLiteral(SmallVectorImpl &Lines, - tooling::Replacements &Result) { + tooling::Replacements &Result) { for (AnnotatedLine *Line : Lines) { requoteJSStringLiteral(Line->Children, Result); if (!Line->Affected) @@ -1589,8 +1597,8 @@ SourceLocation Start = FormatTok->Tok.getLocation(); auto Replace = [&](SourceLocation Start, unsigned Length, StringRef ReplacementText) { - Result.insert( - tooling::Replacement(SourceMgr, Start, Length, ReplacementText)); + Result.insert(tooling::Replacement(Processor.getSourceManager(), + Start, Length, ReplacementText)); }; Replace(Start, 1, IsSingle ? "'" : "\""); Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1, @@ -1601,145 +1609,50 @@ bool Escaped = false; for (size_t i = 1; i < Input.size() - 1; i++) { switch (Input[i]) { - case '\\': - if (!Escaped && i + 1 < Input.size() && - ((IsSingle && Input[i + 1] == '"') || - (!IsSingle && Input[i + 1] == '\''))) { - // Remove this \, it's escaping a " or ' that no longer needs - // escaping - ColumnWidth--; - Replace(Start.getLocWithOffset(i), 1, ""); - continue; - } - Escaped = !Escaped; - break; - case '\"': - case '\'': - if (!Escaped && IsSingle == (Input[i] == '\'')) { - // Escape the quote. - Replace(Start.getLocWithOffset(i), 0, "\\"); - ColumnWidth++; - } - Escaped = false; - break; - default: - Escaped = false; - break; + case '\\': + if (!Escaped && i + 1 < Input.size() && + ((IsSingle && Input[i + 1] == '"') || + (!IsSingle && Input[i + 1] == '\''))) { + // Remove this \, it's escaping a " or ' that no longer needs + // escaping + ColumnWidth--; + Replace(Start.getLocWithOffset(i), 1, ""); + continue; + } + Escaped = !Escaped; + break; + case '\"': + case '\'': + if (!Escaped && IsSingle == (Input[i] == '\'')) { + // Escape the quote. + Replace(Start.getLocWithOffset(i), 0, "\\"); + ColumnWidth++; + } + Escaped = false; + break; + default: + Escaped = false; + break; } } // For formatting, count the number of non-escaped single quotes in them // and adjust ColumnWidth to take the added escapes into account. - // FIXME(martinprobst): this might conflict with code breaking a long string - // literal (which clang-format doesn't do, yet). For that to work, this code - // would have to modify TokenText directly. + // FIXME(martinprobst): this might conflict with code breaking a long + // string literal (which clang-format doesn't do, yet). For that to + // work, this code would have to modify TokenText directly. FormatTok->ColumnWidth = ColumnWidth; } } } - - // Determines whether 'Line' is affected by the SourceRanges given as input. - // Returns \c true if line or one if its children is affected. - bool nonPPLineAffected(AnnotatedLine *Line, - const AnnotatedLine *PreviousLine) { - bool SomeLineAffected = false; - Line->ChildrenAffected = - computeAffectedLines(Line->Children.begin(), Line->Children.end()); - if (Line->ChildrenAffected) - SomeLineAffected = true; - - // Stores whether one of the line's tokens is directly affected. - bool SomeTokenAffected = false; - // Stores whether we need to look at the leading newlines of the next token - // in order to determine whether it was affected. - bool IncludeLeadingNewlines = false; - - // Stores whether the first child line of any of this line's tokens is - // affected. - bool SomeFirstChildAffected = false; - - for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { - // Determine whether 'Tok' was affected. - if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) - SomeTokenAffected = true; - - // Determine whether the first child of 'Tok' was affected. - if (!Tok->Children.empty() && Tok->Children.front()->Affected) - SomeFirstChildAffected = true; - - IncludeLeadingNewlines = Tok->Children.empty(); - } - - // Was this line moved, i.e. has it previously been on the same line as an - // affected line? - bool LineMoved = PreviousLine && PreviousLine->Affected && - Line->First->NewlinesBefore == 0; - - bool IsContinuedComment = - Line->First->is(tok::comment) && Line->First->Next == nullptr && - Line->First->NewlinesBefore < 2 && PreviousLine && - PreviousLine->Affected && PreviousLine->Last->is(tok::comment); - - if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || - IsContinuedComment) { - Line->Affected = true; - SomeLineAffected = true; - } - return SomeLineAffected; - } - - // Marks all lines between I and E as well as all their children as affected. - void markAllAsAffected(SmallVectorImpl::iterator I, - SmallVectorImpl::iterator E) { - while (I != E) { - (*I)->Affected = true; - markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); - ++I; - } - } - - // Returns true if the range from 'First' to 'Last' intersects with one of the - // input ranges. - bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, - bool IncludeLeadingNewlines) { - SourceLocation Start = First.WhitespaceRange.getBegin(); - if (!IncludeLeadingNewlines) - Start = Start.getLocWithOffset(First.LastNewlineOffset); - SourceLocation End = Last.getStartOfNonWhitespace(); - End = End.getLocWithOffset(Last.TokenText.size()); - CharSourceRange Range = CharSourceRange::getCharRange(Start, End); - return affectsCharSourceRange(Range); - } - - // Returns true if one of the input ranges intersect the leading empty lines - // before 'Tok'. - bool affectsLeadingEmptyLines(const FormatToken &Tok) { - CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( - Tok.WhitespaceRange.getBegin(), - Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); - return affectsCharSourceRange(EmptyLineRange); - } - - // Returns true if 'Range' intersects with one of the input ranges. - bool affectsCharSourceRange(const CharSourceRange &Range) { - for (SmallVectorImpl::const_iterator I = Ranges.begin(), - E = Ranges.end(); - I != E; ++I) { - if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && - !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) - return true; - } - return false; - } - static bool inputUsesCRLF(StringRef Text) { return Text.count('\r') * 2 > Text.count('\n'); } bool hasCpp03IncompatibleFormat(const SmallVectorImpl &Lines) { - for (const AnnotatedLine* Line : Lines) { + for (const AnnotatedLine *Line : Lines) { if (hasCpp03IncompatibleFormat(Line->Children)) return true; for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) { @@ -1757,7 +1670,7 @@ int countVariableAlignments(const SmallVectorImpl &Lines) { int AlignmentDiff = 0; - for (const AnnotatedLine* Line : Lines) { + for (const AnnotatedLine *Line : Lines) { AlignmentDiff += countVariableAlignments(Line->Children); for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) { if (!Tok->is(TT_PointerOrReference)) @@ -1804,24 +1717,177 @@ HasBinPackedFunction || !HasOnePerLineFunction; } - void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { - assert(!UnwrappedLines.empty()); - UnwrappedLines.back().push_back(TheLine); + CodeProcessor &Processor; + FormatStyle Style; + bool BinPackInconclusiveFunctions; +}; + +// This class clean up the erroneous/redundant code around the given ranges in +// file. +class Cleaner { +public: + Cleaner(CodeProcessor &Processor) + : Processor(Processor), Style(Processor.getFormatStyle()), + DeletedTokens(FormatTokenLess(Processor.getSourceManager())) {} + + tooling::Replacements + runCleanup(FormatTokenLexer &Tokens, + SmallVectorImpl &AnnotatedLines) { + TokenAnnotator Annotator(Style, Tokens.getKeywords()); + for (auto *Line : AnnotatedLines) { + Annotator.annotate(*Line); + } + // FIXME: in the current implementation the granularity of affected range + // is an annotated line. However, this is not sufficient. Furthermore, + // redundant code introduced by replacements does not necessarily + // intercept with ranges of replacements that result in the redundancy. + // To determine if some redundant code is actually introduced by + // replacements(e.g. deletions), we need to come up with a more + // sophisticated way of computing affected ranges. + Processor.getAffectedRangeManager().computeAffectedLines( + AnnotatedLines.begin(), AnnotatedLines.end()); + + checkEmptyNamespace(AnnotatedLines); + + return generateFixes(); } - void finishRun() override { - UnwrappedLines.push_back(SmallVector()); +private: + bool containsOnlyComments(const AnnotatedLine &Line) { + for (FormatToken *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) { + if (Tok->isNot(tok::comment)) + return false; + } + return true; } - FormatStyle Style; - FileID ID; - SourceManager &SourceMgr; - WhitespaceManager Whitespaces; - SmallVector Ranges; - SmallVector, 2> UnwrappedLines; + // Iterate through all lines and remove any empty (nested) namespaces. + void checkEmptyNamespace(SmallVectorImpl &AnnotatedLines) { + for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { + auto &Line = *AnnotatedLines[i]; + if (Line.startsWith(tok::kw_namespace) || + Line.startsWith(tok::kw_inline, tok::kw_namespace)) { + checkEmptyNamespace(AnnotatedLines, i, i); + } + } - encoding::Encoding Encoding; - bool BinPackInconclusiveFunctions; + for (auto Line : DeletedLines) { + FormatToken *Tok = AnnotatedLines[Line]->First; + while (Tok) { + deleteToken(Tok); + Tok = Tok->Next; + } + } + } + + // The function checks if the namespace, which starts from \p CurrentLine, and + // its nested namespaces are empty and delete them if they are empty. It also + // sets \p NewLine to the last line checked. + // Returns true if the current namespace is empty. + bool checkEmptyNamespace(SmallVectorImpl &AnnotatedLines, + unsigned CurrentLine, unsigned &NewLine) { + unsigned InitLine = CurrentLine, End = AnnotatedLines.size(); + if (Style.BraceWrapping.AfterNamespace) { + // If the left brace is in a new line, we should consume it first so that + // it does not make the namespace non-empty. + // TODO: error handling if there is no left brace. + if (!AnnotatedLines[++CurrentLine]->startsWith(tok::l_brace)) { + NewLine = CurrentLine; + return false; + } + } else if (!AnnotatedLines[CurrentLine]->endsWith(tok::l_brace)) { + return false; + } + while (++CurrentLine < End) { + if (AnnotatedLines[CurrentLine]->startsWith(tok::r_brace)) + break; + + if (AnnotatedLines[CurrentLine]->startsWith(tok::kw_namespace) || + AnnotatedLines[CurrentLine]->startsWith(tok::kw_inline, + tok::kw_namespace)) { + if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine)) + return false; + CurrentLine = NewLine; + continue; + } + + if (containsOnlyComments(*AnnotatedLines[CurrentLine])) + continue; + + // If there is anything other than comments or nested namespaces in the + // current namespace, the namespace cannot be empty. + NewLine = CurrentLine; + return false; + } + + NewLine = CurrentLine; + if (CurrentLine >= End) + return false; + + // Check if the empty namespace is actually affected by changed ranges. + if (!Processor.getAffectedRangeManager().affectsCharSourceRange( + CharSourceRange::getCharRange( + AnnotatedLines[InitLine]->First->Tok.getLocation(), + AnnotatedLines[CurrentLine]->Last->Tok.getEndLoc()))) + return false; + + for (unsigned i = InitLine; i <= CurrentLine; ++i) { + DeletedLines.insert(i); + } + + return true; + } + + // Delete the given token. + inline void deleteToken(FormatToken *Tok) { + if (Tok) + DeletedTokens.insert(Tok); + } + + tooling::Replacements generateFixes() { + tooling::Replacements Fixes; + std::vector Tokens; + std::copy(DeletedTokens.begin(), DeletedTokens.end(), + std::back_inserter(Tokens)); + + // Merge multiple continuous token deletions into one big deletion so that + // the number of replacements can be reduced. This makes computing affected + // ranges more efficient when we run reformat on the changed code. + unsigned Idx = 0; + while (Idx < Tokens.size()) { + unsigned St = Idx, End = Idx; + while ((End + 1) < Tokens.size() && + Tokens[End]->Next == Tokens[End + 1]) { + End++; + } + auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(), + Tokens[End]->Tok.getEndLoc()); + Fixes.insert(tooling::Replacement(Processor.getSourceManager(), SR, "")); + Idx = End + 1; + } + + return Fixes; + } + + // Class for less-than inequality comparason for the set `RedundantTokens`. + // We store tokens in the order they appear in the translation unit so that + // we do not need to sort them in `generateFixes()`. + struct FormatTokenLess { + FormatTokenLess(SourceManager &SM) : SM(SM) {} + + bool operator()(const FormatToken *LHS, const FormatToken *RHS) { + return SM.isBeforeInTranslationUnit(LHS->Tok.getLocation(), + RHS->Tok.getLocation()); + } + SourceManager &SM; + }; + + CodeProcessor &Processor; + FormatStyle Style; + // Tokens to be deleted. + std::set DeletedTokens; + // The line numbers of lines to be deleted. + std::set DeletedLines; }; struct IncludeDirective { @@ -1990,9 +2056,11 @@ return Replaces; } -tooling::Replacements formatReplacements(StringRef Code, - const tooling::Replacements &Replaces, - const FormatStyle &Style) { +template +static tooling::Replacements +processReplacements(T ProcessFunc, StringRef Code, + const tooling::Replacements &Replaces, + const FormatStyle &Style) { if (Replaces.empty()) return tooling::Replacements(); @@ -2000,29 +2068,44 @@ std::vector ChangedRanges = tooling::calculateChangedRanges(Replaces); StringRef FileName = Replaces.begin()->getFilePath(); + tooling::Replacements FormatReplaces = - reformat(Style, NewCode, ChangedRanges, FileName); + ProcessFunc(Style, NewCode, ChangedRanges, FileName); - tooling::Replacements MergedReplacements = - mergeReplacements(Replaces, FormatReplaces); + return mergeReplacements(Replaces, FormatReplaces); +} - return MergedReplacements; +tooling::Replacements formatReplacements(StringRef Code, + const tooling::Replacements &Replaces, + const FormatStyle &Style) { + // We need to use lambda function here since there are two versions of + // `reformat`. + auto Reformat = [](const FormatStyle &Style, StringRef Code, + std::vector Ranges, + StringRef FileName) -> tooling::Replacements { + return reformat(Style, Code, Ranges, FileName); + }; + return processReplacements(Reformat, Code, Replaces, Style); } -tooling::Replacements reformat(const FormatStyle &Style, - SourceManager &SourceMgr, FileID ID, - ArrayRef Ranges, - bool *IncompleteFormat) { - FormatStyle Expanded = expandPresets(Style); - if (Expanded.DisableFormat) - return tooling::Replacements(); - Formatter formatter(Expanded, SourceMgr, ID, Ranges); - return formatter.format(IncompleteFormat); +tooling::Replacements +cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, + const FormatStyle &Style) { + // We need to use lambda function here since there are two versions of + // `cleanup`. + auto Cleanup = [](const FormatStyle &Style, StringRef Code, + std::vector Ranges, + StringRef FileName) -> tooling::Replacements { + return cleanup(Style, Code, Ranges, FileName); + }; + return processReplacements(Cleanup, Code, Replaces, Style); } -tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, - ArrayRef Ranges, - StringRef FileName, bool *IncompleteFormat) { +template +static tooling::Replacements +setupEnvironmentAndProcess(T Callable, const FormatStyle &Style, StringRef Code, + ArrayRef Ranges, StringRef FileName, + bool *IncompleteFormat) { if (Style.DisableFormat) return tooling::Replacements(); @@ -2045,7 +2128,70 @@ SourceLocation End = Start.getLocWithOffset(Range.getLength()); CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); } - return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat); + + return Callable(Style, SourceMgr, ID, CharRanges, IncompleteFormat); +} + +tooling::Replacements reformat(const FormatStyle &Style, + SourceManager &SourceMgr, FileID ID, + ArrayRef Ranges, + bool *IncompleteFormat) { + FormatStyle Expanded = expandPresets(Style); + if (Expanded.DisableFormat) + return tooling::Replacements(); + + CodeProcessor Processor(Expanded, SourceMgr, ID, Ranges); + auto Callable = + [&Processor](SmallVectorImpl &AnnotatedLines, + FormatTokenLexer &Tokens, tooling::Replacements &Result, + bool *IncompleteFormat) -> tooling::Replacements { + Formatter Format(Processor); + return Format.runFormat(AnnotatedLines, Tokens, Result, IncompleteFormat); + }; + return Processor.process(Callable, IncompleteFormat); +} + +tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, + ArrayRef Ranges, + StringRef FileName, bool *IncompleteFormat) { + // We need to use lambda here since there are two versions of `reformat`. + auto Format = [](const FormatStyle &Style, SourceManager &SourceMgr, + FileID ID, ArrayRef Ranges, + bool *IncompleteFormat) -> tooling::Replacements { + return reformat(Style, SourceMgr, ID, Ranges, IncompleteFormat); + }; + return setupEnvironmentAndProcess(Format, Style, Code, Ranges, FileName, + IncompleteFormat); +} + +tooling::Replacements cleanup(const FormatStyle &Style, + SourceManager &SourceMgr, FileID ID, + ArrayRef Ranges) { + CodeProcessor Processor(Style, SourceMgr, ID, Ranges); + // FIXME: find a way to eliminate the unused parameters. + auto Callable = + [&Processor](SmallVectorImpl &AnnotatedLines, + FormatTokenLexer &Tokens, tooling::Replacements &Result, + bool *IncompleteFormat) -> tooling::Replacements { + Cleaner Cleaner(Processor); + return Cleaner.runCleanup(Tokens, AnnotatedLines); + }; + return Processor.process(Callable, nullptr); +} + +tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, + ArrayRef Ranges, + StringRef FileName) { + // We need to use lambda here since there are two versions of `cleanup`. + auto Cleanup = [](const FormatStyle &Style, SourceManager &SourceMgr, + FileID ID, ArrayRef Ranges, + bool *IncompleteFormat) -> tooling::Replacements { + + return cleanup(Style, SourceMgr, ID, Ranges); + }; + + return setupEnvironmentAndProcess(Cleanup, Style, Code, Ranges, FileName, + nullptr); } LangOptions getFormattingLangOpts(const FormatStyle &Style) { @@ -2059,7 +2205,7 @@ LangOpts.Bool = 1; LangOpts.ObjC1 = 1; LangOpts.ObjC2 = 1; - LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. + LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. LangOpts.DeclSpecKeyword = 1; // To get __declspec. return LangOpts; } Index: lib/Format/TokenAnnotator.h =================================================================== --- lib/Format/TokenAnnotator.h +++ lib/Format/TokenAnnotator.h @@ -86,6 +86,14 @@ return startsWith(First, Tokens...); } + /// \c true if this line ends with the given tokens in reversed order, + /// ignoring comments. + /// For example, given tokens [T1, T2, T3, ...], the function returns true if + /// this line is like "... T3 T2 T1". + template bool endsWith(Ts... Tokens) const { + return endsWithInternal(Last, Tokens...); + } + /// \c true if this line looks like a function definition instead of a /// function declaration. Asserts MightBeFunctionDecl. bool mightBeFunctionDefinition() const { @@ -134,6 +142,21 @@ bool startsWith(FormatToken *Tok, A K1, Ts... Tokens) const { return startsWith(Tok, K1) && startsWith(Tok->Next, Tokens...); } + + template + bool endsWithInternal(const FormatToken *Tok, A K1) const { + while (Tok && Tok->is(tok::comment)) + Tok = Tok->Previous; + return Tok && Tok->is(K1); + } + + template + bool endsWithInternal(const FormatToken *Tok, A K1, Ts... Tokens) const { + while (Tok && Tok->is(tok::comment)) + Tok = Tok->Previous; + return Tok && endsWithInternal(Tok, K1) && + endsWithInternal(Tok->Previous, Tokens...); + } }; /// \brief Determines extra information about the tokens comprising an Index: unittests/Format/CMakeLists.txt =================================================================== --- unittests/Format/CMakeLists.txt +++ unittests/Format/CMakeLists.txt @@ -3,6 +3,7 @@ ) add_clang_unittest(FormatTests + CleanupTest.cpp FormatTest.cpp FormatTestJava.cpp FormatTestJS.cpp Index: unittests/Format/CleanupTest.cpp =================================================================== --- /dev/null +++ unittests/Format/CleanupTest.cpp @@ -0,0 +1,118 @@ +//===- unittest/Format/CleanupTest.cpp - Code cleanup unit tests ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Format/Format.h" + +#include "clang/Tooling/Core/Replacement.h" + +#include "gtest/gtest.h" + +namespace clang { +namespace format { +namespace { + +class CleanupTest : public ::testing::Test { +protected: + std::string cleanup(llvm::StringRef Code, + const std::vector &Ranges, + const FormatStyle &Style = getLLVMStyle()) { + tooling::Replacements Replaces = format::cleanup(Style, Code, Ranges); + + std::string Result = applyAllReplacements(Code, Replaces); + EXPECT_NE("", Result); + return Result; + } +}; + +TEST_F(CleanupTest, DeleteEmptyNamespaces) { + std::string Code = "namespace A {\n" + "namespace B {\n" + "} // namespace B\n" + "} // namespace A\n\n" + "namespace C {\n" + "namespace D { int i; }\n" + "inline namespace E { namespace { } }\n" + "}"; + std::string Expected = "\n\n\n\n\nnamespace C {\n" + "namespace D { int i; }\n \n" + "}"; + std::vector Ranges; + Ranges.push_back(tooling::Range(28, 0)); + Ranges.push_back(tooling::Range(91, 6)); + Ranges.push_back(tooling::Range(132, 0)); + std::string Result = cleanup(Code, Ranges); + EXPECT_EQ(Expected, Result); +} + +TEST_F(CleanupTest, NamespaceWithSyntaxError) { + std::string Code = "namespace A {\n" + "namespace B {\n" // missing r_brace + "} // namespace A\n\n" + "namespace C {\n" + "namespace D int i; }\n" + "inline namespace E { namespace { } }\n" + "}"; + std::string Expected = "namespace A {\n" + "\n\n\nnamespace C {\n" + "namespace D int i; }\n \n" + "}"; + std::vector Ranges(1, tooling::Range(0, Code.size())); + std::string Result = cleanup(Code, Ranges); + EXPECT_EQ(Expected, Result); +} + +TEST_F(CleanupTest, EmptyNamespaceNotAffected) { + std::string Code = "namespace A {\n\n" + "namespace {\n\n}}"; + // Even though the namespaces are empty, but the inner most empty namespace + // block is not affected by the changed ranges. + std::string Expected = "namespace A {\n\n" + "namespace {\n\n}}"; + // Set the changed range to be the second "\n". + std::vector Ranges(1, tooling::Range(14, 0)); + std::string Result = cleanup(Code, Ranges); + EXPECT_EQ(Expected, Result); +} + +TEST_F(CleanupTest, EmptyNamespaceWithCommentsNoBreakBeforeBrace) { + std::string Code = "namespace A {\n" + "namespace B {\n" + "// Yo\n" + "} // namespace B\n" + "} // namespace A\n" + "namespace C { // Yo\n" + "}"; + std::string Expected = "\n\n\n\n\n\n"; + std::vector Ranges(1, tooling::Range(0, Code.size())); + std::string Result = cleanup(Code, Ranges); + EXPECT_EQ(Expected, Result); +} + +TEST_F(CleanupTest, EmptyNamespaceWithCommentsBreakBeforeBrace) { + std::string Code = "namespace A\n" + "/* Yo */ {\n" + "namespace B\n" + "{\n" + "// Yo\n" + "} // namespace B\n" + "} // namespace A\n" + "namespace C\n" + "{ // Yo\n" + "}\n"; + std::string Expected = "\n\n\n\n\n\n\n\n\n\n"; + std::vector Ranges(1, tooling::Range(0, Code.size())); + FormatStyle Style = getLLVMStyle(); + Style.BraceWrapping.AfterNamespace = true; + std::string Result = cleanup(Code, Ranges, Style); + EXPECT_EQ(Expected, Result); +} + +} // end namespace +} // end namespace format +} // end namespace clang Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -11293,6 +11293,35 @@ Code, formatReplacements(Code, Replaces, Style))); } +TEST_F(ReplacementTest, FixOnlyAffectedCodeAfterReplacements) { + std::string Code = "namespace A {\n" + "namespace B {\n" + " int x;\n" + "} // namespace B\n" + "} // namespace A\n" + "\n" + "namespace C {\n" + "namespace D { int i; }\n" + "inline namespace E { namespace { int y; } }\n" + "int x= 0;" + "}"; + std::string Expected = "\n\nnamespace C {\n" + "namespace D { int i; }\n\n" + "int x= 0;" + "}"; + FileID ID = Context.createInMemoryFile("fix.cpp", Code); + tooling::Replacements Replaces; + Replaces.insert(tooling::Replacement( + Context.Sources, Context.getLocation(ID, 3, 3), 6, "")); + Replaces.insert(tooling::Replacement( + Context.Sources, Context.getLocation(ID, 9, 34), 6, "")); + + format::FormatStyle Style = format::getLLVMStyle(); + auto FinalReplaces = formatReplacements( + Code, cleanupAroundReplacements(Code, Replaces, Style), Style); + EXPECT_EQ(Expected, applyAllReplacements(Code, FinalReplaces)); +} + } // end namespace } // end namespace format } // end namespace clang