Index: lib/Format/BreakableToken.cpp =================================================================== --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -204,8 +204,8 @@ BreakableToken::Split BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { - return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), - ColumnLimit, Style.TabWidth, Encoding); + // Splitting line comments is handled by \c CommentReflower. + return Split(StringRef::npos, 0); } void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, @@ -363,9 +363,14 @@ BreakableToken::Split BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const { - return getCommentSplit(Lines[LineIndex].substr(TailOffset), - getContentStartColumn(LineIndex, TailOffset), - ColumnLimit, Style.TabWidth, Encoding); + if (InPPDirective) { + return getCommentSplit(Lines[LineIndex].substr(TailOffset), + getContentStartColumn(LineIndex, TailOffset), + ColumnLimit, Style.TabWidth, Encoding); + } + // Splitting block comments outside preprocessor directives is handled by \c + // CommentReflower. + return Split(StringRef::npos, 0); } void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, Index: lib/Format/CMakeLists.txt =================================================================== --- lib/Format/CMakeLists.txt +++ lib/Format/CMakeLists.txt @@ -3,6 +3,7 @@ add_clang_library(clangFormat AffectedRangeManager.cpp BreakableToken.cpp + CommentReflower.cpp Comments.cpp ContinuationIndenter.cpp Format.cpp Index: lib/Format/CommentReflower.h =================================================================== --- /dev/null +++ lib/Format/CommentReflower.h @@ -0,0 +1,41 @@ +//===--- CommentReflower.h - Reflow Comments --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares comment reflowing functionality. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_COMMENTREFLOWER_H +#define LLVM_CLANG_LIB_FORMAT_COMMENTREFLOWER_H + +#include "TokenAnalyzer.h" +#include "clang/Basic/LLVM.h" +#include "clang/Format/Format.h" +#include "clang/Tooling/Core/Replacement.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { +namespace format { + +class CommentReflower : public TokenAnalyzer { +public: + CommentReflower(const Environment &Env, const FormatStyle &Style); + + tooling::Replacements + analyze(TokenAnnotator &Annotator, + SmallVectorImpl &AnnotatedLines, + FormatTokenLexer &Tokens) override; +}; + +} // end namespace format +} // end namespace clang + +#endif // LLVM_CLANG_LIB_FORMAT_COMMENTREFLOWER_H Index: lib/Format/CommentReflower.cpp =================================================================== --- /dev/null +++ lib/Format/CommentReflower.cpp @@ -0,0 +1,955 @@ +//===--- CommentReflower.cpp - Reflow Comments ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements comment reflowing functionality. +/// +//===----------------------------------------------------------------------===// + +#include "CommentReflower.h" +#include "Comments.h" +#include "Encoding.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace std; + +#define DEBUG_TYPE "format-reflower" + +namespace clang { +namespace format { + +namespace { + +// TODO(krasimir): Copied from Format. Possibly should do better? +static bool inputUsesCRLF(StringRef Text) { + return Text.count('\r') * 2 > Text.count('\n'); +} + +static StringRef newline(bool UsesCRLF) { + if (UsesCRLF) + return "\r\n"; + return "\n"; +} + +static bool IsBlank(char C) { + switch (C) { + case ' ': + case '\t': + case '\v': + case '\f': + case '\r': + return true; + default: + return false; + } +} + +// TODO(krasimir): Copied from WhitespaceManager. Possibly should do better? +static void appendIndentText(std::string &Text, const FormatStyle &Style, + unsigned IndentLevel, unsigned Spaces, + unsigned WhitespaceStartColumn) { + switch (Style.UseTab) { + case FormatStyle::UT_Never: + Text.append(Spaces, ' '); + break; + case FormatStyle::UT_Always: { + unsigned FirstTabWidth = + Style.TabWidth - WhitespaceStartColumn % Style.TabWidth; + // Indent with tabs only when there's at least one full tab. + if (FirstTabWidth + Style.TabWidth <= Spaces) { + Spaces -= FirstTabWidth; + Text.append("\t"); + } + Text.append(Spaces / Style.TabWidth, '\t'); + Text.append(Spaces % Style.TabWidth, ' '); + break; + } + case FormatStyle::UT_ForIndentation: + if (WhitespaceStartColumn == 0) { + unsigned Indentation = IndentLevel * Style.IndentWidth; + // This happens, e.g. when a line in a block comment is indented less than + // the first one. + if (Indentation > Spaces) + Indentation = Spaces; + unsigned Tabs = Indentation / Style.TabWidth; + Text.append(Tabs, '\t'); + Spaces -= Tabs * Style.TabWidth; + } + Text.append(Spaces, ' '); + break; + case FormatStyle::UT_ForContinuationAndIndentation: + if (WhitespaceStartColumn == 0) { + unsigned Tabs = Spaces / Style.TabWidth; + Text.append(Tabs, '\t'); + Spaces -= Tabs * Style.TabWidth; + } + Text.append(Spaces, ' '); + break; + } +} + +// Checks if \p Pos is a starting position of a word in \p Text. +bool startsWord(StringRef Text, size_t Pos) { + assert(Pos < Text.size() && "invalid position"); + return !IsBlank(Text[Pos]) && (Pos == 0 || IsBlank(Text[Pos - 1])); +} + +// Checks if \p Pos is an ending position of a word in \p Text. +bool endsWord(StringRef Text, size_t Pos) { + assert(Pos < Text.size() && "invalid position"); + return !IsBlank(Text[Pos]) && + (Pos + 1 == Text.size() || IsBlank(Text[Pos + 1])); +} + +/// \brief Returns the position of the first word start in \p Text starting from +/// \p Pos, or StringRef::npos if no such position exists. +size_t getFirstWordStartFrom(StringRef Text, size_t Pos) { + for (size_t i = Pos; i < Text.size(); ++i) { + if (startsWord(Text, i)) { + return i; + } + } + return StringRef::npos; +} + +/// \brief Returns the position of the first word end in \p Text starting from +/// \p Pos, or StringRef::npos if no such position exists. +size_t getFirstWordEndFrom(StringRef Text, size_t Pos) { + assert(Pos < Text.size()); + for (size_t i = Pos; i < Text.size(); ++i) { + if (endsWord(Text, i)) { + return i; + } + } + return StringRef::npos; +} + +/// \brief Returs the position of the last word end in \p Text ending upto +/// \p Pos, or StringRef::npos if no such position exists. +size_t getLastWordEndUpto(StringRef Text, size_t Pos) { + Pos = std::min(Pos, Text.size()); + size_t LastEnd = StringRef::npos; + for (size_t i = 0; i < Pos; ++i) { + if (endsWord(Text, i)) { + LastEnd = i; + } + } + return LastEnd; +} + +/// \brief Returns the prefix of the comment line \p Text. +/// +/// \p TokenType must be one of \c TT_BlockComment or \c TT_LineComment. +/// \p BeginsSection indicates whether this line begins a comment section. +StringRef getCommentLinePrefix(StringRef Text, TokenType Type, + bool BeginsSection) { + size_t PrefixLength = 0; + if (Type == TT_BlockComment) { + if (BeginsSection) { + assert(Text.startswith("/*")); + PrefixLength = 2; + } else if (Text.startswith("*") && !Text.startswith("*/")) { + PrefixLength = 1; + } + } + if (Type == TT_LineComment) { + if (Text.startswith("///")) { + PrefixLength = 3; + } else if (Text.startswith("//!")) { + PrefixLength = 3; + } else if (Text.startswith("//")) { + PrefixLength = 2; + } + } + return Text.substr(0, PrefixLength); +} + +/// \brief Returns the postfix of the comment line \p Text. +/// \p TokenType must be one of \c TT_BlockComment or \c TT_LineComment. +/// \p EndsSection indicates whether this line ends a comment section. +StringRef getCommentLinePostfix(StringRef Text, TokenType Type, + bool EndsSection) { + size_t PostfixLength = 0; + if (Type == TT_BlockComment) { + if (EndsSection) { + assert(Text.endswith("*/")); + PostfixLength = 2; + } + } + return Text.substr(Text.size() - PostfixLength); +} + +/// \brief Enumerates the \c CommentLine parts in order. +/// +/// For example, consider the line: +/// "int i; /* a comment */" +/// It has parts as follows: +/// - pretext is the whitespace before the prefix: " " +/// - prefix is "/*" +/// - indent is " " +/// - content is "a comment" +/// - trail is " " +/// - postfix is "*/" +enum CommentLinePart { + CLP_Pretext, + CLP_Prefix, + CLP_Indent, + CLP_Content, + CLP_Trail, + CLP_Postfix, + CLP_Endcol, + CLP_Size +}; + +/// \brief Represents a line of a comment. +struct CommentLine { + CommentLine(const AnnotatedLine *TokenLine, const FormatToken *Token, + int LineOffset, StringRef Text, unsigned SectionId, + bool FirstInToken, encoding::Encoding Encoding, unsigned TabWidth, + unsigned IndentLevel, SourceLocation EraseLoc) + : BeginsSection(SectionId == 0), EndsSection(false), + FirstInToken(FirstInToken), SectionId(SectionId), TokenLine(TokenLine), + Token(Token), Text(Text), Offset(CLP_Size, 0), Column(CLP_Size, 0), + Encoding(Encoding), TabWidth(TabWidth), LineOffset(LineOffset), + IndentLevel(IndentLevel), EraseLoc(EraseLoc) { + if (Token->is(TT_BlockComment)) { + Type = TT_BlockComment; + } else { + assert(Token->is(TT_LineComment) && "Unsupported comment token type"); + Type = TT_LineComment; + } + } + + /// \brief \c true if this line begins a comment section. + bool BeginsSection; + /// \brief \c true if this line ends a comment section. + bool EndsSection; + /// \brief \c true if this line is the first line from its token. + bool FirstInToken; + /// \brief Then index of this line in the comment section. + unsigned SectionId; + /// \brief The \c TokenLine of this line. + const AnnotatedLine *TokenLine; + /// \brief The \c Token of this line. + const FormatToken *Token; + /// \brief The text of this line. + std::string Text; + /// \brief Stores the begin offsets of the comment line parts. + SmallVector Offset; + /// \brief Stores the begin columns of the comment line parts. + SmallVector Column; + /// \brief A prefix to use when adding a new comment line after this line. + std::string PrefixAfter; + /// \brief The \c Encoding used for this line. + encoding::Encoding Encoding; + /// \brief The \c TabWidth used for this line. + unsigned TabWidth; + /// \brief The offset from the beginning of the \c Token test to the beginning + /// of this line. + int LineOffset; + /// \brief The indentation level for this line. + unsigned IndentLevel; + /// \brief The type of this line. + /// Either \c TT_LineComment or \c TT_BlockComment. + TokenType Type; + /// \brief The original source location of this line used when erasing it. + SourceLocation EraseLoc; + + /// \brief The original end source location of this line. + SourceLocation getEndLoc() const { + return getPretextLoc().getLocWithOffset(Text.size()); + } + /// \brief The source location of the pretext of this line. + SourceLocation getPretextLoc() const { + return Token->getStartOfNonWhitespace().getLocWithOffset(LineOffset); + } + + bool is(TokenType Type) const { return Token->is(Type); } + + StringRef get(CommentLinePart At) const { + assert(At < CLP_Size); + return StringRef(Text).substr(Offset[At], Offset[next(At)] - Offset[At]); + } + + StringRef get(CommentLinePart Begin, CommentLinePart End) const { + assert(Begin < CLP_Size); + return StringRef(Text).substr(Offset[Begin], Offset[End]); + } + + /// \brief A prefix for the text appended to this line. + /// + /// TODO(krasimir): Possibly use " " (two spaces) after "." (fullstop), as + /// for example vim and emacs does? + StringRef getAppendPrefix() const { return " "; } + + /// \brief Returns the byte size of the part \p At of this line. + size_t getSize(CommentLinePart At) const { + assert(At < CLP_Size); + return Offset[next(At)] - Offset[At]; + } + + /// \brief Returns the start column of the part \p At of this line. + unsigned getColumn(CommentLinePart At) const { return Column[At]; } + + /// \brief Returns the column width of the part \p At of this line. + unsigned getColumns(CommentLinePart At) const { + assert(At < CLP_Size); + return Column[next(At)] - Column[At]; + } + + /// \brief Dumps this line to an output stream. Useful for debug. + void dump(raw_ostream &os) const { + os.changeColor(raw_ostream::CYAN); + os << (BeginsSection ? 'b' : '-') << (EndsSection ? 'e' : '-') + << (is(TT_LineComment) ? '/' : '*') << llvm::format("%3d:", SectionId) + << std::string(getColumn(CLP_Pretext), '.'); + os.changeColor(raw_ostream::YELLOW, /*Bold=*/false, /*BG=*/true); + os << get(CLP_Pretext); + os.changeColor(raw_ostream::RED, true, false); + os << get(CLP_Prefix); + os.changeColor(raw_ostream::RED, false, true); + os << get(CLP_Indent); + os.resetColor(); + os << get(CLP_Content); + os.changeColor(raw_ostream::GREEN, false, true); + os << get(CLP_Trail); + os.changeColor(raw_ostream::GREEN); + os << get(CLP_Postfix) << "\n"; + os.resetColor(); + } + + /// \brief Initializes the comment line parts. + void initParts(); + + // Sets the part \p At of this line to \p Piece. + void set(CommentLinePart At, StringRef Piece); + +private: + void translateOffsets(CommentLinePart From, int Delta) { + for (int Id = From; Id < CLP_Size; ++Id) { + assert(Delta >= 0 || static_cast(Offset[Id]) >= -Delta); + Offset[Id] += Delta; + } + } + + void translateColumns(CommentLinePart From, int Delta) { + for (int Id = From; Id < CLP_Size; ++Id) { + assert(Delta >= 0 || static_cast(Column[Id]) >= -Delta); + Column[Id] += Delta; + } + } + + void translate(CommentLinePart From, StringRef Delta) { + translateOffsets(From, Delta.size()); + translateColumns(From, encoding::columnWidthWithTabs(Delta, Column[From], + TabWidth, Encoding)); + } + + static CommentLinePart next(CommentLinePart Part) { + assert(Part < CLP_Endcol); + return static_cast(Part + 1); + } +}; + +void CommentLine::set(CommentLinePart At, StringRef Piece) { + assert(At < CLP_Size); + int OldSize = getSize(At); + int OldColumns = getColumns(At); + int NewSize = Piece.size(); + int NewColumns = + encoding::columnWidthWithTabs(Piece, Column[At], TabWidth, Encoding); + Text.replace(Text.begin() + Offset[At], Text.begin() + Offset[next(At)], + Piece.begin(), Piece.end()); + translateOffsets(next(At), NewSize - OldSize); + translateColumns(next(At), NewColumns - OldColumns); +} + +void CommentLine::initParts() { + unsigned BeforeColumns = FirstInToken ? Token->OriginalColumn : 0; + translateColumns(CLP_Pretext, BeforeColumns); + StringRef Piece(Text); + StringRef Pretext = Piece.substr(0, Piece.size() - Piece.ltrim().size()); + Piece = Piece.substr(Pretext.size()); + StringRef Prefix = getCommentLinePrefix(Piece, Type, BeginsSection); + Piece = Piece.substr(Prefix.size()); + StringRef Postfix = getCommentLinePostfix(Piece, Type, EndsSection); + Piece = Piece.substr(0, Piece.size() - Postfix.size()); + StringRef Trail = Piece.substr(Piece.rtrim().size()); + Piece = Piece.substr(0, Piece.size() - Trail.size()); + StringRef Indent = Piece.substr(0, Piece.size() - Piece.ltrim().size()); + Piece = Piece.substr(Indent.size()); + StringRef Content = Piece; + + translate(CLP_Prefix, Pretext); + translate(CLP_Indent, Prefix); + translate(CLP_Content, Indent); + translate(CLP_Trail, Content); + translate(CLP_Postfix, Trail); + translate(CLP_Endcol, Postfix); + + assert(get(CLP_Pretext) == Pretext); + assert(get(CLP_Prefix) == Prefix); + assert(get(CLP_Indent) == Indent); + assert(get(CLP_Content) == Content); + assert(get(CLP_Trail) == Trail); + assert(get(CLP_Postfix) == Postfix); + + PrefixAfter = Prefix; + if (BeginsSection && is(TT_BlockComment)) { + assert(Prefix == "/*"); + if (Token->Previous && Token->NewlinesBefore == 0) { + PrefixAfter = " "; + } else { + PrefixAfter = " *"; + } + } + + if (EndsSection && is(TT_BlockComment) && Token->Next && + Token->Next->NewlinesBefore == 0) { + Column[CLP_Endcol] = Token->Next->OriginalColumn; + } +} + +/// \brief Extracts the comment lines from \p AnnotatedLines and appends them to +/// \p CommentLines. +static void appendCommentLines(ArrayRef AnnotatedLines, + std::vector *CommentLines, + encoding::Encoding Encoding, unsigned TabWidth, + bool UsesCRLF) { + std::vector LocalLines; + bool PreviousTokenWasLineComment = false; + unsigned LineCommentId = 0; + for (auto &Line : AnnotatedLines) { + unsigned IndentLevel = Line->Level; + for (const FormatToken *Token = Line->First; Token; Token = Token->Next) { + appendCommentLines(Token->Children, CommentLines, Encoding, TabWidth, + UsesCRLF); + if (!Token->is(TT_BlockComment) && !Token->is(TT_LineComment)) { + PreviousTokenWasLineComment = false; + continue; + } + + SmallVector Lines; + StringRef TokenText = Token->TokenText; + TokenText.split(Lines, newline(UsesCRLF)); + size_t LineOffset = 0; + SourceLocation FirstEraseLoc = Token->WhitespaceRange.getBegin(); + SourceLocation LastEndLoc = Token->getStartOfNonWhitespace(); + if (Token->is(TT_BlockComment)) { + for (size_t LineId = 0; LineId < Lines.size(); ++LineId) { + LocalLines.push_back(CommentLine( + Line, Token, LineOffset, Lines[LineId], LineId, + /*FirstInToken=*/(LineId == 0), Encoding, TabWidth, IndentLevel, + /*EraseLoc=*/(LineId == 0 ? FirstEraseLoc : LastEndLoc))); + LineOffset += Lines[LineId].size(); + LastEndLoc = + Token->getStartOfNonWhitespace().getLocWithOffset(LineOffset); + LineOffset += newline(UsesCRLF).size(); + } + PreviousTokenWasLineComment = false; + } else if (Token->is(TT_LineComment)) { + if (!PreviousTokenWasLineComment || Token->NewlinesBefore > 1) { + LineCommentId = 0; + } + for (size_t LineId = 0; LineId < Lines.size(); ++LineId) { + LocalLines.push_back(CommentLine( + Line, Token, LineOffset, Lines[LineId], LineCommentId + LineId, + /*FirstInToken=*/(LineId == 0), Encoding, TabWidth, IndentLevel, + FirstEraseLoc)); + LineOffset += Lines[LineId].size() + newline(UsesCRLF).size(); + } + LineCommentId += Lines.size(); + PreviousTokenWasLineComment = true; + } + } + } + for (size_t i = 0; i < LocalLines.size(); ++i) { + CommentLine &Line = LocalLines[i]; + if (i + 1 == LocalLines.size() || LocalLines[i + 1].BeginsSection) { + Line.EndsSection = true; + } + Line.initParts(); + } + CommentLines->insert(CommentLines->end(), LocalLines.begin(), + LocalLines.end()); +} + +/// \brief Represents a replacement of a comment line. +/// +/// A replacement could be in one of three states: +/// - either the original line is preserved (IsOriginal == true) +/// - or the original line is erased (isErased() == true) +/// - or the original line is replaced by \c Lines. +struct CommentLineReplacement { + CommentLineReplacement(const CommentLine &OriginalLine) + : OriginalLine(OriginalLine), IsOriginal(true), Lines{OriginalLine} {} + + const CommentLine &getBack() const { return Lines.back(); } + + bool isErased() const { return Lines.empty(); } + + void setBack(const CommentLine &Back) { + assert(!Lines.empty()); + Lines.back() = Back; + IsOriginal = false; + } + + void addBack(const CommentLine &Back) { + Lines.push_back(Back); + IsOriginal = false; + } + + void popBack() { + Lines.pop_back(); + IsOriginal = false; + } + + void dump(raw_ostream &os) const { + os.changeColor(raw_ostream::CYAN); + os << (IsOriginal ? 'O' : (isErased() ? 'E' : 'C')); + OriginalLine.dump(os); + if (!IsOriginal) { + for (const CommentLine &Line : Lines) { + os.changeColor(raw_ostream::CYAN); + os << '|'; + Line.dump(os); + } + } + os.changeColor(raw_ostream::CYAN); + os << "'========\n"; + os.resetColor(); + } + + CommentLine OriginalLine; + bool IsOriginal; + std::vector Lines; +}; + +/// \brief Builds a list of \c CommentLineReplacements from a list of \c +/// CommentLines. +class CommentLinesReplacer { +public: + CommentLinesReplacer(const std::vector &CommentLines, + const FormatStyle &Style, encoding::Encoding Encoding, + AffectedRangeManager *AffectedRangeMgr) + : Style(Style), Encoding(Encoding), AffectedRangeMgr(AffectedRangeMgr), + CommentPragmasRegex(Style.CommentPragmas) { + for (const CommentLine &Line : CommentLines) { + Replacements.push_back(CommentLineReplacement(Line)); + } + } + + /// \brief Returns the current \c CommentLine. + const CommentLine &getCurrent() const { + return Replacements[CurrentId].getBack(); + } + + /// \brief Returs the next \c CommentLine. + const CommentLine &getNext() const { return Replacements[NextId].getBack(); } + + /// \brief Checks if \p Line fits the column limit. + bool fits(const CommentLine &Line) const { + return Line.getColumn(CLP_Endcol) <= Style.ColumnLimit; + } + + /// \brief Sets the current comment line to \p Line. + void setCurrent(const CommentLine &Line) { + Replacements[CurrentId].setBack(Line); + } + + /// \brief Adds \p Line after the current comment line and makes \p Line the + /// current. + void addAfterCurrent(CommentLine Line) { + Replacements[CurrentId].addBack(Line); + } + + /// \brief Finishes processing the current comment line. + void finishCurrent() { + CurrentId = NextId; + ++NextId; + } + + /// \brief Dumps a diagnostic message about the replacement process. + void info(const std::string &where, const std::string &message) { + DEBUG(llvm::errs() << where << ": " << message << "\n"); + } + + /// \brief Checks if \p Line must be kept from breaking and reflowing. + bool mustKeep(const CommentLine &Line) { + return (Line.TokenLine->InPPDirective && Line.is(TT_BlockComment)) || + CommentPragmasRegex.match(Line.get(CLP_Indent, CLP_Endcol)) || + (Line.TokenLine && Line.TokenLine->Type == LT_ImportStatement) || + (Line.Token->Previous && + Line.Token->Previous->is(TT_ImplicitStringLiteral)) || + (Line.Token->Next && Line.Token->Next->NewlinesBefore == 0) || + (Line.BeginsSection && Line.get(CLP_Indent) == "") || + Line.get(CLP_Content) == "clang-format on" || + Line.get(CLP_Content) == "clang-format off" || + !AffectedRangeMgr->affectsCharSourceRange( + CharSourceRange::getCharRange(Line.EraseLoc, Line.getEndLoc())); + } + + /// \brief Returns the length of the piece of content of \p Line that is + /// either the largest fitting piece taking \p AvailableColumns, or if that is + /// not possible the smallest piece that overfits. + size_t contentFitLength(CommentLine Line, unsigned AvailableColumns) { + StringRef Content = Line.get(CLP_Content); + size_t Pos = encoding::lengthOfColumnWidthWithTabs( + Content, Line.getColumn(CLP_Content), Style.TabWidth, Encoding, + AvailableColumns); + size_t LastEnd = getLastWordEndUpto(Content, Pos); + if (LastEnd == StringRef::npos) { + LastEnd = getFirstWordEndFrom(Content, Pos); + } + assert(LastEnd != StringRef::npos); + return LastEnd + 1; + } + + /// \brief Tries to shrink the whitespace around the column limit in \p Line. + CommentLine shrink(const CommentLine &Line) { + StringRef Content = Line.get(CLP_Content); + if (fits(Line)) { + info("shrink", "line fits"); + return Line; + } + if (Line.getColumn(CLP_Content) >= Style.ColumnLimit) { + info("shrink", "content starts at or after the column limit"); + return Line; + } + if (Line.getColumn(CLP_Trail) <= Style.ColumnLimit) { + info("shrink", "column limit is at the trail of a line"); + CommentLine Shrunk = Line; + Shrunk.set(CLP_Trail, Line.get(CLP_Postfix).empty() ? "" : " "); + return Shrunk; + } + unsigned OccupiedColumns = + Line.getColumn(CLP_Content) + Line.getColumns(CLP_Postfix); + unsigned AvailableColumns = + std::max(0, Style.ColumnLimit - OccupiedColumns); + size_t FitLength = contentFitLength(Line, AvailableColumns); + if (FitLength == Content.size()) { + info("shrink", "current cannot be cut"); + return Line; + } + std::string ShrunkContent = Content.substr(0, FitLength); + ShrunkContent += Line.getAppendPrefix(); + ShrunkContent += Content.substr(getFirstWordStartFrom(Content, FitLength)); + CommentLine Shrunk = Line; + Shrunk.set(CLP_Content, ShrunkContent); + return Shrunk; + } + + /// \brief Tries to break the current line. + void breakCurrent() { + info("breakCurrent", "trying to break current"); + const CommentLine &Current = getCurrent(); + if (fits(Current)) { + info("breakCurrent", "current fits"); + InProgress = false; + finishCurrent(); + return; + } + CommentLine Shrunk = shrink(Current); + if (fits(Shrunk)) { + info("breakCurrent", "shrunk fits"); + InProgress = true; + setCurrent(Shrunk); + return; + } + // Determine where to break the current line. + StringRef Content = Current.get(CLP_Content); + unsigned OccupiedColumns = Current.getColumn(CLP_Content); + int AvailableColumns = + std::max(0, Style.ColumnLimit - OccupiedColumns); + size_t FitLength = contentFitLength(Current, AvailableColumns); + if (FitLength == Content.size()) { + info("breakCurrent", "current has no word ending upto available columns"); + if (Current.is(TT_BlockComment) && Current.EndsSection) { + info("breakCurrent", "breaking the postfix to a new line"); + CommentLine Last = Current; + Last.set(CLP_Trail, ""); + Last.set(CLP_Postfix, ""); + Last.EndsSection = false; + CommentLine Next = Current; + Next.set(CLP_Prefix, ""); + Next.set(CLP_Indent, ""); + Next.set(CLP_Content, ""); + Next.set(CLP_Trail, " "); + setCurrent(Last); + addAfterCurrent(Next); + } + InProgress = false; + finishCurrent(); + return; + } + size_t NextOffset = getFirstWordStartFrom(Content, FitLength); + if (NextOffset == StringRef::npos) { + info("breakCurrent", "current has no content to break"); + InProgress = false; + finishCurrent(); + } + info("breakCurrent", "breaking current"); + InProgress = true; + CommentLine Last = Current; + Last.set(CLP_Content, Content.substr(0, FitLength)); + Last.set(CLP_Trail, ""); + Last.set(CLP_Postfix, ""); + Last.EndsSection = false; + CommentLine Next = Current; + Next.set(CLP_Content, Content.substr(NextOffset)); + Next.set(CLP_Prefix, Current.PrefixAfter); + Next.BeginsSection = false; + setCurrent(Last); + addAfterCurrent(Next); + } + + /// \brief Tries to take a piece of the next comment line and append it to the + /// current comment line. + void takeFromNext() { + const CommentLine &Current = getCurrent(); + const CommentLine &Next = getNext(); + if (Current.getColumn(CLP_Content) != Next.getColumn(CLP_Content)) { + info("takeFromNext", "current and next have different indent"); + InProgress = false; + return; + } + unsigned TakenColumns = Current.getColumn(CLP_Trail) + + Current.getColumns(CLP_Postfix) + + Current.getAppendPrefix().size(); + if (TakenColumns >= Style.ColumnLimit) { + info("takeFromNext", "current has no available columns"); + InProgress = false; + return; + } + unsigned AvailableColumns = Style.ColumnLimit - TakenColumns; + unsigned Threshold = + std::min(AvailableColumns, Next.getColumns(CLP_Content)); + StringRef NextContent = Next.get(CLP_Content); + + size_t CutOffset = getLastWordEndUpto( + NextContent, encoding::lengthOfColumnWidthWithTabs( + NextContent, Next.getColumn(CLP_Content), + Style.TabWidth, Encoding, Threshold)); + if (CutOffset == StringRef::npos) { + info("takeFromNext", "no part of next would fit the available columns"); + InProgress = false; + return; + } + size_t NextOffset = getFirstWordStartFrom(NextContent, CutOffset + 1); + bool EraseNext = (NextOffset == StringRef::npos); + + // Append Piece to Current. + StringRef Piece = Next.get(CLP_Content).substr(0, CutOffset + 1); + std::string NewContent = Current.get(CLP_Content); + NewContent += Current.getAppendPrefix(); + NewContent += Piece; + CommentLine NewCurrent = Current; + NewCurrent.set(CLP_Content, NewContent); + if (EraseNext) { + NewCurrent.set(CLP_Trail, Next.get(CLP_Trail)); + NewCurrent.set(CLP_Postfix, Next.get(CLP_Postfix)); + NewCurrent.EndsSection = Next.EndsSection; + } + setCurrent(NewCurrent); + + // Erase (from) Next. + if (EraseNext) { + info("takeFromNext", "erasing next"); + Replacements[NextId].popBack(); + return; + } + info("takeFromNext", "erasing a piece from next"); + CommentLine NewNext = Next; + StringRef NewNextContent = Next.get(CLP_Content).substr(NextOffset); + NewNext.set(CLP_Content, NewNextContent); + Replacements[NextId].setBack(NewNext); + finishCurrent(); + } + + /// \brief Reflows the comment lines. + void reflow() { + if (Style.ColumnLimit == 0) + return; + while (CurrentId < Replacements.size()) { + if (Replacements[CurrentId].isErased()) { + info("reflow", "current is erased"); + finishCurrent(); + continue; + } + const CommentLine &Current = getCurrent(); + info("reflow", "reflowing current"); + DEBUG(Current.dump(llvm::errs())); + if (Current.get(CLP_Content) == "clang-format off") { + info("reflow", "current is clang-format off"); + ClangFormatOff = true; + InProgress = false; + finishCurrent(); + continue; + } + if (Current.get(CLP_Content) == "clang-format on") { + info("reflow", "current is clang-format on"); + ClangFormatOff = false; + InProgress = false; + finishCurrent(); + continue; + } + if (ClangFormatOff) { + InProgress = false; + finishCurrent(); + continue; + } + if (mustKeep(Current)) { + info("reflow", "current must be kept"); + InProgress = false; + finishCurrent(); + continue; + } + unsigned LastColumn = Current.getColumn(CLP_Endcol); + if (LastColumn > Style.ColumnLimit) { + info("reflow", "current last column is after the column limit"); + breakCurrent(); + continue; + } + if (Current.EndsSection) { + info("reflow", "current ends section"); + finishCurrent(); + continue; + } + assert(fits(Current)); + info("reflow", "current fits in the column limit"); + if (InProgress && NextId < Replacements.size()) { + if (Replacements[NextId].isErased()) { + info("reflow", "next is erased"); + ++NextId; + continue; + } + const CommentLine &Next = getNext(); + info("reflow", "reflowing next"); + DEBUG(Next.dump(llvm::errs())); + if (mustKeep(Next)) { + info("reflow", "next must be kept"); + finishCurrent(); + InProgress = false; + continue; + } + takeFromNext(); + continue; + } + finishCurrent(); + } + } + + std::vector getReplacements() const { + return Replacements; + } + +private: + const FormatStyle &Style; + encoding::Encoding Encoding; + AffectedRangeManager *AffectedRangeMgr; + llvm::Regex CommentPragmasRegex; + bool ClangFormatOff = false; + bool InProgress = false; + size_t CurrentId = 0; + size_t NextId = 1; + std::vector Replacements; +}; + +/// \brief Populates \p Replaces with \p CommentReplacements. +static void populateCommentLineReplacements( + const std::vector &CommentReplacements, + tooling::Replacements *Replaces, const SourceManager &SM, + const FormatStyle &Style, bool UsesCRLF) { + for (const CommentLineReplacement &Replacement : CommentReplacements) { + if (Replacement.IsOriginal) { + continue; + } + if (Replacement.isErased()) { + SourceLocation BeginLoc = Replacement.OriginalLine.EraseLoc; + auto EraseErr = Replaces->add(tooling::Replacement( + SM, CharSourceRange::getCharRange( + BeginLoc, Replacement.OriginalLine.getEndLoc()), + "")); + if (EraseErr) { + llvm::errs() << llvm::toString(std::move(EraseErr)); + } + continue; + } + std::string Text; + for (size_t i = 0; i < Replacement.Lines.size(); ++i) { + if (i > 0) { + appendIndentText(Text, Style, Replacement.OriginalLine.IndentLevel, + Replacement.Lines[i].getColumn(CLP_Pretext), + /*WhitespaceStartColumn=*/0); + } + Text += Replacement.Lines[i].Text; + if (i + 1 < Replacement.Lines.size()) { + Text += newline(UsesCRLF); + } + } + auto ReplaceErr = Replaces->add(tooling::Replacement( + SM, + CharSourceRange::getCharRange(Replacement.OriginalLine.getPretextLoc(), + Replacement.OriginalLine.getEndLoc()), + Text)); + if (ReplaceErr) { + llvm::errs() << llvm::toString(std::move(ReplaceErr)); + } + } +} + +} // end anonymous namespace + +CommentReflower::CommentReflower(const Environment &Env, + const FormatStyle &Style) + : TokenAnalyzer(Env, Style) {} + +tooling::Replacements +CommentReflower::analyze(TokenAnnotator &Annotator, + SmallVectorImpl &AnnotatedLines, + FormatTokenLexer &Tokens) { + tooling::Replacements Replaces; + std::vector CommentLines; + bool UsesCRLF = + inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID())); + appendCommentLines(AnnotatedLines, &CommentLines, Encoding, Style.TabWidth, + UsesCRLF); + DEBUG(if (!CommentLines.empty()) { + llvm::errs() << "CommentLines:\n"; + for (const CommentLine &Line : CommentLines) { + Line.dump(llvm::errs()); + } + llvm::errs() << "\n"; + }); + + CommentLinesReplacer Replacer(CommentLines, Style, Encoding, + &AffectedRangeMgr); + Replacer.reflow(); + std::vector LineReplacements = + Replacer.getReplacements(); + + DEBUG(if (!LineReplacements.empty()) { + llvm::errs() << "Replacements:\n"; + for (const CommentLineReplacement &Replacement : LineReplacements) { + Replacement.dump(llvm::errs()); + } + llvm::errs() << "\n"; + }); + + populateCommentLineReplacements(LineReplacements, &Replaces, + Env.getSourceManager(), Style, UsesCRLF); + + return Replaces; +} + +} // end namespace format +} // end namespace clang Index: lib/Format/Encoding.h =================================================================== --- lib/Format/Encoding.h +++ lib/Format/Encoding.h @@ -104,6 +104,48 @@ } } +/// \brief Returns the length of the longest prefix of \p Text that fits in +/// \p Width columns. +/// +/// The \p Text starts at \p StartColumn, has \p Encoding and uses \p TabWidth +/// columns per tab. +inline size_t lengthOfColumnWidthWithTabs(StringRef Text, + unsigned StartColumn, + unsigned TabWidth, + encoding::Encoding Encoding, + unsigned Width) { + size_t End = Text.size(); + std::vector Offsets; + for (size_t Offset = 0; Offset < End; + Offset += getCodePointNumBytes(Text[Offset], Encoding)) { + Offsets.push_back(Offset); + } + Offsets.push_back(End); + auto ComputeWidth = [&](size_t At) { + return columnWidthWithTabs(Text.substr(0, Offsets[At]), StartColumn, + TabWidth, Encoding); + }; + size_t Left = 0; + if (ComputeWidth(Left) > Width) { + return 0; + } + size_t Right = Offsets.size() - 1; + if (ComputeWidth(Right) <= Width) { + return Text.size(); + } + while (Left + 1 < Right) { + assert(ComputeWidth(Left) <= Width && "binary search left invariant"); + assert(ComputeWidth(Right) > Width && "binary search right invariant"); + size_t Mid = (Left + Right) / 2; + if (ComputeWidth(Mid) <= Width) { + Left = Mid; + } else { + Right = Mid; + } + } + return Offsets[Left]; +} + inline bool isOctDigit(char c) { return '0' <= c && c <= '7'; } inline bool isHexDigit(char c) { Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -15,6 +15,7 @@ #include "clang/Format/Format.h" #include "AffectedRangeManager.h" +#include "CommentReflower.h" #include "ContinuationIndenter.h" #include "FormatTokenLexer.h" #include "SortJavaScriptImports.h" @@ -1745,6 +1746,20 @@ } Formatter Format(*Env, Expanded, IncompleteFormat); + + if (Style.ReflowComments) { + tooling::Replacements FormatReplaces = Format.process(); + auto FormattedCode = tooling::applyAllReplacements(Code, FormatReplaces); + if (!FormattedCode) { + return FormatReplaces; + } + std::vector NewRanges = + tooling::calculateRangesAfterReplacements(FormatReplaces, Ranges); + auto FormattedEnv = Environment::CreateVirtualEnvironment( + *FormattedCode, FileName, NewRanges); + CommentReflower Reflow(*FormattedEnv, Style); + return FormatReplaces.merge(Reflow.process()); + } return Format.process(); } Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -1636,9 +1636,10 @@ EXPECT_EQ("/* some comment\n" " * a comment\n" - "* that we break\n" + " * that we\n" + " * break\n" " * another comment\n" - "* we have to break\n" + " * we have to break\n" "* a left comment\n" " */", format(" /* some comment\n" @@ -1783,6 +1784,376 @@ " 0x00, 0x00, 0x00, 0x00}; // comment\n"); } +TEST_F(FormatTest, ReflowsComments) { + // Break a long line and reflow with the full next line. + EXPECT_EQ("// long long long\n" + "// long long\n", + format("// long long long long\n" + "// long\n", getLLVMStyleWithColumns(20))); + + // Break a long line and reflow with a part of the next line. + EXPECT_EQ("// long long long\n" + "// long long\n" + "// long_long", + format("// long long long long\n" + "// long long_long", getLLVMStyleWithColumns(20))); + + // Break but do not reflow if the first word from the next line is too long. + EXPECT_EQ("// long long long\n" + "// long\n" + "// long_long_long\n", + format("// long long long long\n" + "// long_long_long\n", getLLVMStyleWithColumns(20))); + + // Don't add a newline if the next line has no newline. + EXPECT_EQ("// long long long\n" + "// long long", + format("// long long long long\n" + "// long", getLLVMStyleWithColumns(20))); + + // Don't break or reflow short lines. + verifyFormat( + "// long\n" + "// long long long lo\n" + "// long long long lo\n" + "// long", getLLVMStyleWithColumns(20)); + + // Keep prefixes while reflowing. + EXPECT_EQ("/// long long long\n" + "/// long long\n", + format( + "/// long long long long\n" + "/// long\n", getLLVMStyleWithColumns(20))); + EXPECT_EQ("//! long long long\n" + "//! long long\n", + format( + "//! long long long long\n" + "//! long\n", getLLVMStyleWithColumns(20))); + EXPECT_EQ("/* long long long\n" + " * long long */", + format("/* long long long long\n" + " * long */", getLLVMStyleWithColumns(20))); + + // Keep indentation while reflowing. + EXPECT_EQ( + "/* long long long\n" + " * long long long\n" + " */", + format("/* long long long long\n" + " * long long\n" + " */", getLLVMStyleWithColumns(20))); + + // Reflow two short lines; keep the postfix of the last one. + EXPECT_EQ( + "/* long long long\n" + " * long long long */", + format("/* long long long long\n" + " * long\n" + " * long */", + getLLVMStyleWithColumns(20))); + + // Put the postfix of the last short reflow line on a newline if it doesn't + // fit. + EXPECT_EQ( + "/* long long long\n" + " * long long longg\n" + " */", + format("/* long long long long\n" + " * long\n" + " * longg */", + getLLVMStyleWithColumns(20))); + + // Break single line block comments that are first in the line with ' *' + // decoration. + EXPECT_EQ("/* long long long\n" + " * long */", + format("/* long long long long */", + getLLVMStyleWithColumns(20))); + + // Break single line block comment that are not first in the line with ' ' + // decoration. + EXPECT_EQ("int i; /* long long\n" + " long */", + format("int i; /* long long long */", + getLLVMStyleWithColumns(20))); + + // Reflow a line that just goes over the column limit. + EXPECT_EQ( + "// long long long\n" + "// lon long", + format("// long long long lon\n" + "// long", getLLVMStyleWithColumns(20))); + + // Stop reflowing early. + EXPECT_EQ( + "// long long long\n" + "// long\n" + "// long long\n" + "// long", + format("// long long long long\n" + "// long long\n" + "// long", + getLLVMStyleWithColumns(20))); + + // Reflow into the last part of a really long line that has been broken into + // multiple lines. + EXPECT_EQ( + "// long long long\n" + "// long long long\n" + "// long long long\n", + format("// long long long long long long long long\n" + "// long\n", + getLLVMStyleWithColumns(20))); + + // Break the first line, then reflow the beginning of the second and third + // line up. + EXPECT_EQ("// long long long\n" + "// lon1 lon2 lon2\n" + "// lon2 lon3 lon3", + format("// long long long lon1\n" + "// lon2 lon2 lon2\n" + "// lon3 lon3", + getLLVMStyleWithColumns(20))); + + // Shrink the first line, then reflow the second line up. + EXPECT_EQ("// long long long", + format("// long long\n" + "// long", + getLLVMStyleWithColumns(20))); + + // Don't shrink content leading whitespace. + EXPECT_EQ("int i; /// a", + format("int i; /// a", + getLLVMStyleWithColumns(20))); + + // Shrink trailing whitespace if there is no postfix. + EXPECT_EQ("// long long long", + format("// long long long ", getLLVMStyleWithColumns(20))); + + // Shrinks trailing whitespace to a single one if there is postfix. + EXPECT_EQ("/* long long long */", + format("/* long long long */", getLLVMStyleWithColumns(20))); + + // Break a block comment postfix if exceeding the line limit. + EXPECT_EQ( + "/* long\n" + " */", + format("/* long */", getLLVMStyleWithColumns(20))); + + // Reflow the beginning of the second line, then break the rest. + EXPECT_EQ( + "// long long long\n" + "// lon1 lon2 lon2\n" + "// lon2 lon2 lon2\n" + "// lon3", + format("// long long long lon1\n" + "// lon2 lon2 lon2 lon2 lon2 lon3", + getLLVMStyleWithColumns(20))); + + // Reflow indented comments. + EXPECT_EQ( + "{\n" + " // long long long\n" + " // long long\n" + " int i; /* long lon\n" + " g long\n" + " */\n" + "}", + format("{\n" + " // long long long long\n" + " // long\n" + " int i; /* long lon g\n" + " long */\n" + "}", getLLVMStyleWithColumns(20))); + + // Reflow lines after a broken line. + EXPECT_EQ("int a; // Trailing\n" + " // comment on\n" + " // 2 or 3\n" + " // lines.\n", + format("int a; // Trailing comment\n" + " // on 2\n" + " // or 3\n" + " // lines.\n", getLLVMStyleWithColumns(20))); + EXPECT_EQ( + "/// This long line\n" + "/// gets reflown.\n", + format("/// This long line gets\n" + "/// reflown.\n", getLLVMStyleWithColumns(20))); + EXPECT_EQ("//! This long line\n" + "//! gets reflown.\n", + format(" //! This long line gets\n" + " //! reflown.\n", getLLVMStyleWithColumns(20))); + EXPECT_EQ("/* This long line\n" + " * gets reflown.\n" + " */\n", + format("/* This long line gets\n" + " * reflown.\n" + " */\n", getLLVMStyleWithColumns(20))); + + // Reflow after indentation makes a line too long. + EXPECT_EQ("{\n" + " // long long long\n" + " // lo long\n" + "}\n", + format("{\n" + "// long long long lo\n" + "// long\n" + "}\n", getLLVMStyleWithColumns(20))); + + // Break and reflow multiple lines. + EXPECT_EQ( + "/*\n" + " * Reflow the end of\n" + " * line by 1 2 3 4.\n" + " */\n", + format("/*\n" + " * Reflow the end of line\n" + " * by\n" + " * 1\n" + " * 2\n" + " * 3\n" + " * 4.\n" + " */\n", getLLVMStyleWithColumns(20))); + EXPECT_EQ( + "/// First line gets\n" + "/// broken. Second\n" + "/// line gets\n" + "/// reflown and\n" + "/// broken. Third\n" + "/// gets reflown.\n", + format( + "/// First line gets broken.\n" + "/// Second line gets reflown and broken.\n" + "/// Third gets reflown.\n", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("int i; // first long\n" + " // long snd\n" + " // long.\n", + format("int i; // first long long\n" + " // snd long.\n", getLLVMStyleWithColumns(20))); + EXPECT_EQ("{\n" + " // first long line\n" + " // line second\n" + " // long line line\n" + " // third long line\n" + " // line\n" + "}\n", + format("{\n" + " // first long line line\n" + " // second long line line\n" + " // third long line line\n" + "}\n", getLLVMStyleWithColumns(20))); + EXPECT_EQ( + "int i; /* first line\n" + " * second\n" + " * line third\n" + " * line\n" + " */", + format("int i; /* first line\n" + " * second line\n" + " * third line\n" + " */", getLLVMStyleWithColumns(20))); + + // Keep the block comment endling '*/' while reflowing. + EXPECT_EQ("/* Long long long\n" + " * line short */\n", + format("/* Long long long line\n" + " * short */\n", getLLVMStyleWithColumns(20))); + + // Don't reflow across blank lines or blank comment lines. + EXPECT_EQ("int i; // This long\n" + " // line gets\n" + " // broken.\n" + " //\n" + " // keep.\n", + format("int i; // This long line gets broken.\n" + " // \n" + " // keep.\n", getLLVMStyleWithColumns(20))); + EXPECT_EQ( + "/* long long long\n" + " * long\n" + " *\n" + " * line */", + format("/* long long long long\n" + " *\n" + " * line */", getLLVMStyleWithColumns(20))); + EXPECT_EQ("// long long long\n" + "// long\n" + "\n" + "// long", + format("// long long long long\n" + "\n" + "// long", getLLVMStyleWithColumns(20))); + + // Don't reflow between separate blocks of comments. + EXPECT_EQ("/* First comment\n" + " * block will */\n" + "/* Snd\n" + " */\n", + format("/* First comment block\n" + " * will */\n" + "/* Snd\n" + " */\n", getLLVMStyleWithColumns(20))); + + // Don't reflow lines having different indentation. + EXPECT_EQ( + "// long long long\n" + "// long\n" + "// long", + format("// long long long long\n" + "// long", + getLLVMStyleWithColumns(20))); + + // Don't break or reflow after implicit string literals. + verifyFormat( + "#include // l l l\n" + " // l", + getLLVMStyleWithColumns(20)); + + // Don't break or reflow comments on import lines. + EXPECT_EQ( + "#include \"t\" /* l l l\n" + " * l */", + format( + "#include \"t\" /* l l l\n" + " * l */", + getLLVMStyleWithColumns(20))); + + // Don't reflow between different trailing comment sections. + EXPECT_EQ( + "int i; // long long\n" + " // long\n" + "int j; // long long\n" + " // long\n", + format( + "int i; // long long long\n" + "int j; // long long long\n", + getLLVMStyleWithColumns(20))); + + // Don't reflow if the first word on the next line is longer than the + // available space at current line. + EXPECT_EQ("int i; // trigger\n" + " // reflow\n" + " // longsec\n", + format("int i; // trigger reflow\n" + " // longsec\n", + getLLVMStyleWithColumns(20))); + + + // Keep empty comment lines. + EXPECT_EQ("/**/", + format(" /**/", getLLVMStyleWithColumns(20))); + EXPECT_EQ("/* */", + format(" /* */", getLLVMStyleWithColumns(20))); + EXPECT_EQ("/* */", + format(" /* */", getLLVMStyleWithColumns(20))); + EXPECT_EQ("//", + format(" // ", getLLVMStyleWithColumns(20))); + EXPECT_EQ("///", + format(" /// ", getLLVMStyleWithColumns(20))); +} + TEST_F(FormatTest, IgnoresIf0Contents) { EXPECT_EQ("#if 0\n" "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n" @@ -3065,7 +3436,7 @@ EXPECT_EQ( "int xxxxxxxxx; /* " "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\n" - "zzzzzz\n" + " zzzzzz\n" "0*/", format("int xxxxxxxxx; /* " "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy zzzzzz\n" @@ -6323,7 +6694,7 @@ "#include \"string.h\"\n" "#include \n" "#include < path with space >\n" - "#include_next " + "#include_next \n" "#include \"abc.h\" // this is included for ABC\n" "#include \"some long include\" // with a comment\n" "#include \"some very long include paaaaaaaaaaaaaaaaaaaaaaath\"", @@ -7081,7 +7452,7 @@ EXPECT_EQ("/*\n" "**\n" "* aaaaaa\n" - "*aaaaaa\n" + "* aaaaaa\n" "*/", format("/*\n" "**\n" @@ -7193,10 +7564,9 @@ "};", getLLVMStyleWithColumns(15))); - // FIXME: The formatting is still wrong here. EXPECT_EQ("a = {\n" " 1111 /* a\n" - " */\n" + " */\n" "};", format("a = {1111 /* a */\n" "};", @@ -11585,6 +11955,22 @@ " int j;\n" " /* clang-format on */\n" " int k;")); + EXPECT_EQ( + "// clang-format off\n" + "// long long long long long long line\n" + "/* clang-format on */\n" + "/* long long long\n" + " * long long long\n" + " * line */\n" + "int i; /* clang-format off */\n" + "/* long long long long long long line */\n", + format("// clang-format off\n" + "// long long long long long long line\n" + "/* clang-format on */\n" + "/* long long long long long long line */\n" + "int i; /* clang-format off */\n" + "/* long long long long long long line */\n", + getLLVMStyleWithColumns(20))); } TEST_F(FormatTest, DoNotCrashOnInvalidInput) {