Index: lib/Format/BreakableToken.h =================================================================== --- lib/Format/BreakableToken.h +++ lib/Format/BreakableToken.h @@ -97,13 +97,14 @@ /// \p Length can be set to StringRef::npos, which means "to the end of line". virtual unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const = 0; + StringRef::size_type Length, + bool Start = true) const = 0; /// \brief Returns a range (offset, length) at which to break the line at /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not /// violate \p ColumnLimit. virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, + unsigned ColumnLimit, unsigned ReflowColumn, llvm::Regex &CommentPragmasRegex) const = 0; /// \brief Emits the previously retrieved \p Split via \p Whitespaces. @@ -219,7 +220,8 @@ public: unsigned getLineCount() const override; unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; + StringRef::size_type Length, + bool Start = true) const override; protected: BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, @@ -249,6 +251,7 @@ const FormatStyle &Style); Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ReflowColumn, llvm::Regex &CommentPragmasRegex) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; @@ -269,6 +272,7 @@ public: unsigned getLineCount() const override; Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ReflowColumn, llvm::Regex &CommentPragmasRegex) const override; void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; @@ -341,7 +345,8 @@ const FormatStyle &Style); unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; + StringRef::size_type Length, + bool Start = true) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, @@ -417,7 +422,8 @@ const FormatStyle &Style); unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; + StringRef::size_type Length, + bool Start = true) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, Index: lib/Format/BreakableToken.cpp =================================================================== --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -67,8 +67,12 @@ unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding) { - if (ColumnLimit <= ContentStartColumn + 1) + DEBUG(llvm::dbgs() << "Comment split: \"" << Text << ", " << ColumnLimit + << "\"\n"); + if (ColumnLimit <= ContentStartColumn + 1) { + llvm::errs() << "here\n"; return BreakableToken::Split(StringRef::npos, 0); + } unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; unsigned MaxSplitBytes = 0; @@ -188,8 +192,8 @@ unsigned BreakableSingleLineToken::getLineLengthAfterSplit( unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const { - return StartColumn + Prefix.size() + Postfix.size() + + StringRef::size_type Length, bool Start) const { + return (Start ? (StartColumn + Prefix.size()) : 0) + Postfix.size() + encoding::columnWidthWithTabs(Line.substr(TailOffset, Length), StartColumn + Prefix.size(), Style.TabWidth, Encoding); @@ -215,10 +219,12 @@ BreakableToken::Split BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, + unsigned ColumnLimit, unsigned ReflowColumn, llvm::Regex &CommentPragmasRegex) const { return getStringSplit(Line.substr(TailOffset), - StartColumn + Prefix.size() + Postfix.size(), + ReflowColumn == 0 + ? StartColumn + Prefix.size() + Postfix.size() + : ReflowColumn + 1, ColumnLimit, Style.TabWidth, Encoding); } @@ -241,13 +247,15 @@ BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, + unsigned ColumnLimit, unsigned ReflowColumn, llvm::Regex &CommentPragmasRegex) const { // Don't break lines matching the comment pragmas regex. if (CommentPragmasRegex.match(Content[LineIndex])) return Split(StringRef::npos, 0); return getCommentSplit(Content[LineIndex].substr(TailOffset), - getContentStartColumn(LineIndex, TailOffset), + ReflowColumn == 0 + ? getContentStartColumn(LineIndex, TailOffset) + : ReflowColumn + 1, ColumnLimit, Style.TabWidth, Encoding); } @@ -282,11 +290,14 @@ // otherwise we compute the largest piece of text that fits after // ReflowStartColumn. Split ReflowSplit = - FullWidth <= ColumnLimit - ? Split(TrimmedText.size(), Text.size() - TrimmedText.size()) - : getCommentSplit(Text, ReflowStartColumn, ColumnLimit, - Style.TabWidth, Encoding); - +// FullWidth <= ColumnLimit + //? + Split(TrimmedText.size(), Text.size() - TrimmedText.size()) +// : getCommentSplit(Text, ReflowStartColumn, ColumnLimit, +// Style.TabWidth, Encoding) + ; + + return ReflowSplit; // We need to be extra careful here, because while it's OK to keep a long line // if it can't be broken into smaller pieces (like when the first word of a // long line is longer than the column limit), it's not OK to reflow that long @@ -503,8 +514,9 @@ unsigned BreakableBlockComment::getLineLengthAfterSplit( unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const { - unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); + StringRef::size_type Length, bool Start) const { + unsigned ContentStartColumn = + Start ? getContentStartColumn(LineIndex, TailOffset) : 0; unsigned LineLength = ContentStartColumn + encoding::columnWidthWithTabs( Content[LineIndex].substr(TailOffset, Length), @@ -555,6 +567,9 @@ llvm::Regex &CommentPragmasRegex) const { if (!mayReflow(LineIndex, CommentPragmasRegex)) return Split(StringRef::npos, 0); + + return Split(0, 0); + /* StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); Split Result = getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn, ColumnLimit); @@ -563,6 +578,7 @@ if (Result.first != StringRef::npos) Result.first += Content[LineIndex].size() - TrimmedContent.size(); return Result; + */ } unsigned @@ -583,26 +599,57 @@ unsigned BreakableBlockComment::getLineLengthAfterSplitBefore( unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn, unsigned ColumnLimit, Split SplitBefore) const { - if (SplitBefore.first == StringRef::npos || - // Block comment line contents contain the trailing whitespace after the - // decoration, so the need of left trim. Note that this behavior is - // consistent with the breaking of block comments where the indentation of - // a broken line is uniform across all the lines of the block comment. - SplitBefore.first + SplitBefore.second < - Content[LineIndex].ltrim().size()) { - // A piece of line, not the whole, gets reflown. - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + // FIXME: pull together with getLineLegnthAfterSplit (same as for + // BreakableLineCommentSeciotn. + unsigned ContentStartColumn = 0; + if (SplitBefore.first == StringRef::npos) { + ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); } else { - // The whole line gets reflown, need to check if we need to insert a break - // for the postfix or not. - StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); - unsigned ReflownColumn = - getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); - if (ReflownColumn <= ColumnLimit) { - return ReflownColumn; + // One space at the start. + ContentStartColumn = 1; + } + llvm::errs() << "Start: " << ContentStartColumn << "\n"; + StringRef Text = Content[LineIndex].substr(TailOffset, StringRef::npos); + if (SplitBefore.first != StringRef::npos) { + // If we reflow, we remove the space at the start of the text. + Text = Text.ltrim(Blanks); + } + unsigned LineLength = + ContentStartColumn + + encoding::columnWidthWithTabs( + Text, + ContentStartColumn, Style.TabWidth, Encoding); + llvm::errs() << "LL: " << LineLength << "\n"; + // The last line gets a "*/" postfix. + if (LineIndex + 1 == Lines.size()) { + LineLength += 2; + // We never need a decoration when breaking just the trailing "*/" postfix. + // Note that checking that Length == 0 is not enough, since Length could + // also be StringRef::npos. + if (Content[LineIndex].substr(TailOffset, StringRef::npos).empty()) { + LineLength -= Decoration.size(); } - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); } + return LineLength; +// if (SplitBefore.first == StringRef::npos || +// // Block comment line contents contain the trailing whitespace after the +// // decoration, so the need of left trim. Note that this behavior is +// // consistent with the breaking of block comments where the indentation of +// // a broken line is uniform across all the lines of the block comment. +// SplitBefore.first + SplitBefore.second < +// Content[LineIndex].ltrim().size()) { +// // A piece of line, not the whole, gets reflown. +// } else { +// // The whole line gets reflown, need to check if we need to insert a break +// // for the postfix or not. +// StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); +// unsigned ReflownColumn = +// getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); +// if (ReflownColumn <= ColumnLimit) { +// return ReflownColumn; +// } +// return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); +// } } bool BreakableBlockComment::introducesBreakBeforeToken() const { @@ -647,10 +694,10 @@ // Check if we need to also insert a break at the whitespace range. // Note that we don't need a penalty for this break, since it doesn't change // the total number of lines. - unsigned ReflownColumn = - getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); - if (ReflownColumn > ColumnLimit) - insertBreak(LineIndex, 0, SplitBefore, Whitespaces); +// unsigned ReflownColumn = +// getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); +// if (ReflownColumn > ColumnLimit) +// insertBreak(LineIndex, 0, SplitBefore, Whitespaces); return; } @@ -815,10 +862,13 @@ unsigned BreakableLineCommentSection::getLineLengthAfterSplit( unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const { - unsigned ContentStartColumn = - (TailOffset == 0 ? ContentColumn[LineIndex] - : OriginalContentColumn[LineIndex]); + StringRef::size_type Length, bool Start) const { + unsigned ContentStartColumn = 0; + if (Start) + ContentStartColumn = (TailOffset == 0 ? ContentColumn[LineIndex] + : OriginalContentColumn[LineIndex]); + llvm::errs() << "Start: " << ContentStartColumn << ", Text: \"" + << Content[LineIndex].substr(TailOffset, Length) << "\n"; return ContentStartColumn + encoding::columnWidthWithTabs( Content[LineIndex].substr(TailOffset, Length), ContentStartColumn, Style.TabWidth, Encoding); @@ -850,24 +900,47 @@ llvm::Regex &CommentPragmasRegex) const { if (!mayReflow(LineIndex, CommentPragmasRegex)) return Split(StringRef::npos, 0); - return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn, - ColumnLimit); + + // In a line comment section each line is a separate token; thus, after a + // split we replace all whitespace before the current line comment token + // (which does not need to be included in the split), plus the start of the + // line up to where the content starts. + return Split(0, 0); +// return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn, +// ColumnLimit); } unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore( unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn, unsigned ColumnLimit, Split SplitBefore) const { - if (SplitBefore.first == StringRef::npos || - SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { - // A piece of line, not the whole line, gets reflown. - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + // FIXME: Pull together with getLineLengthAfterSplit, as it's really the same + // thing. + unsigned ContentStartColumn = 0; + if (SplitBefore.first == StringRef::npos) { + ContentStartColumn = (TailOffset == 0 ? ContentColumn[LineIndex] + : OriginalContentColumn[LineIndex]); } else { - // The whole line gets reflown. - unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); - return StartColumn + - encoding::columnWidthWithTabs(Content[LineIndex], StartColumn, - Style.TabWidth, Encoding); + // One space at the start. + ContentStartColumn = 1; } + return ContentStartColumn + + encoding::columnWidthWithTabs( + Content[LineIndex].substr(TailOffset, StringRef::npos), + ContentStartColumn, Style.TabWidth, Encoding); + + /* + if (SplitBefore.first == StringRef::npos || + SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { + // A piece of line, not the whole line, gets reflown. + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } else { + // The whole line gets reflown. + unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); + return StartColumn + + encoding::columnWidthWithTabs(Content[LineIndex], StartColumn, + Style.TabWidth, Encoding); + } + */ } void BreakableLineCommentSection::replaceWhitespaceBefore( @@ -880,7 +953,9 @@ // // line 1 \ // // line 2 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { + llvm::errs() << "h1\n"; if (SplitBefore.first != StringRef::npos) { + llvm::errs() << "h2\n"; // Reflow happens between tokens. Replace the whitespace between the // tokens by the empty string. Whitespaces.replaceWhitespace( @@ -933,10 +1008,11 @@ // Add a break after a reflow split has been introduced, if necessary. // Note that this break doesn't need to be penalized, since it doesn't change // the number of lines. - if (SplitBefore.first != StringRef::npos && - SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { - insertBreak(LineIndex, 0, SplitBefore, Whitespaces); - } +// if (SplitBefore.first != StringRef::npos && +// SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { +// llvm::errs() << "HUH!!!\n"; +// insertBreak(LineIndex, 0, SplitBefore, Whitespaces); +// } } void BreakableLineCommentSection::updateNextToken(LineState &State) const { Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -1490,6 +1490,8 @@ unsigned NewBreakPenalty = Current.isStringLiteral() ? Style.PenaltyBreakString : Style.PenaltyBreakComment; + llvm::errs() << "UNBREAKABLE: " << Current.UnbreakableTailLength << "\n"; + //unsigned RemainingSpace = ColumnLimit; unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; bool BreakInserted = Token->introducesBreakBeforeToken(); // Store whether we inserted a new line break at the end of the previous @@ -1498,128 +1500,213 @@ // We use a conservative reflowing strategy. Reflow starts after a line is // broken or the corresponding whitespace compressed. Reflow ends as soon as a // line that doesn't get reflown with the previous line is reached. - bool ReflowInProgress = false; + bool TryReflow = false; unsigned Penalty = 0; unsigned RemainingTokenColumns = 0; unsigned TailOffset = 0; + unsigned ReflownTextLength = 0; DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn << ".\n"); for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); LineIndex != EndIndex; ++LineIndex) { DEBUG(llvm::dbgs() << " Line: " << LineIndex - << " (Reflow: " << ReflowInProgress << ")\n"); + << " (TryReflow: " << TryReflow << ")\n"); BreakableToken::Split SplitBefore(StringRef::npos, 0); - if (ReflowInProgress) { + // Whether we did a reflow into the current line. + bool Reflown = false; + if (TryReflow) { + ReflownTextLength += RemainingTokenColumns; + DEBUG(llvm::dbgs() << " Size of reflown text: " + << ReflownTextLength + << "\n Potential reflow split: "); + TryReflow = false; SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns, - RemainingSpace, CommentPragmasRegex); + ColumnLimit, CommentPragmasRegex); + if (SplitBefore.first != StringRef::npos) { + DEBUG(llvm::dbgs() << SplitBefore.first << ", " << SplitBefore.second + << "\n"); + TailOffset = SplitBefore.first + SplitBefore.second; + llvm::errs() << "1: " << RemainingTokenColumns << "\n"; + RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( + LineIndex, TailOffset, RemainingTokenColumns, RemainingSpace, + SplitBefore); + llvm::errs() << "2: " << RemainingTokenColumns << "\n"; + Reflown = true; + if (ReflownTextLength + RemainingTokenColumns > RemainingSpace) { + DEBUG(llvm::dbgs() << " Over limit after reflow, need: " + << (ReflownTextLength + RemainingTokenColumns) + << ", space: " << RemainingSpace + << ", reflown prefix: " << ReflownTextLength + << ", offset in line: " << TailOffset << "\n"); + BreakableToken::Split Split = + Token->getSplit(LineIndex, TailOffset, ColumnLimit, + ReflownTextLength, CommentPragmasRegex); + if (Split.first == StringRef::npos) { + llvm::errs() << " Did not find later break...\n"; + Reflown = false; + } else { + // FIXME: Count actual length instead of bytes. + // FIXME: Compress whitespace? + if (ReflownTextLength + Split.first > ColumnLimit) { + DEBUG(llvm::dbgs() << " Next split protrudes, need: " + << (ReflownTextLength + Split.first) + << ", space: " << ColumnLimit); + unsigned ExcessCharactersPenalty = + (ReflownTextLength + Split.first - ColumnLimit) * + Style.PenaltyExcessCharacter; + if (NewBreakPenalty < ExcessCharactersPenalty) { + Reflown = false; + } + } + } + } + } else { + DEBUG(llvm::dbgs() << "not found.\n"); + } } - ReflowInProgress = SplitBefore.first != StringRef::npos; - DEBUG({ - if (ReflowInProgress) - llvm::dbgs() << " Reflowing.\n"; - }); - TailOffset = - ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0; - // If we found a reflow split and have added a new break before this line, - // we are going to remove the line break at the start of the next logical - // line. - // For example, here we'll add a new line break after 'text', and - // subsequently delete the line break between 'that' and 'reflows'. - // // some text that - // // reflows - // -> - // // some text - // // that reflows - // When adding the line break, we also added the penalty for it, so we need - // to subtract that penalty again when we remove the line break due to - // reflowing. - if (ReflowInProgress && NewBreakBefore) { - assert(Penalty >= NewBreakPenalty); - Penalty -= NewBreakPenalty; + if (Reflown) { + // If we found a reflow split and have added a new break before this + // line, we are going to remove the line break at the start of the next + // logical line. For example, here we'll add a new line break after + // 'text', and subsequently delete the line break between 'that' and + // 'reflows'. + // // some text that + // // reflows + // -> + // // some text + // // that reflows + // When adding the line break, we also added the penalty for it, so we + // need to subtract that penalty again when we remove the line break due + // to reflowing. + if (NewBreakBefore) { + assert(Penalty >= NewBreakPenalty); + Penalty -= NewBreakPenalty; + } + TryReflow = true; + if (!DryRun) + Token->replaceWhitespaceBefore( + LineIndex, ReflownTextLength + RemainingTokenColumns, + RemainingSpace, SplitBefore, Whitespaces); + } else { + // If we didn't reflow into this line, the only space to consider is the + // current logical line. + RemainingTokenColumns = 0; + ReflownTextLength = 0; + TailOffset = 0; + SplitBefore = BreakableToken::Split(StringRef::npos, 0); + RemainingTokenColumns += Token->getLineLengthAfterSplitBefore( + LineIndex, TailOffset, RemainingTokenColumns, RemainingSpace, + SplitBefore); + // Adapt the start of the token, for example indent. + if (!DryRun) + Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, + RemainingSpace, SplitBefore, + Whitespaces); } NewBreakBefore = false; - if (!DryRun) - Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, - RemainingSpace, SplitBefore, Whitespaces); - RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( - LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore); - while (RemainingTokenColumns > RemainingSpace) { - DEBUG(llvm::dbgs() << " Over limit, need: " << RemainingTokenColumns - << ", space: " << RemainingSpace << "\n"); - BreakableToken::Split Split = Token->getSplit( - LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex); + + llvm::errs() << "RTC: " << RemainingTokenColumns + << ", Space: " << RemainingSpace << "\n"; + while (ReflownTextLength + RemainingTokenColumns > RemainingSpace) { + DEBUG(llvm::dbgs() << " Over limit, need: " + << (ReflownTextLength + RemainingTokenColumns) + << ", space: " << RemainingSpace + << ", reflown prefix: " << ReflownTextLength + << ", offset in line: " << TailOffset << "\n"); + BreakableToken::Split Split = + Token->getSplit(LineIndex, TailOffset, ColumnLimit, + ReflownTextLength, CommentPragmasRegex); if (Split.first == StringRef::npos) { // The last line's penalty is handled in addNextStateToQueue(). if (LineIndex < EndIndex - 1) - Penalty += Style.PenaltyExcessCharacter * - (RemainingTokenColumns - RemainingSpace); + Penalty += + Style.PenaltyExcessCharacter * + (ReflownTextLength + RemainingTokenColumns - ColumnLimit); DEBUG(llvm::dbgs() << " No break opportunity.\n"); break; } assert(Split.first != 0); - // Check if compressing the whitespace range will bring the line length - // under the limit. If that is the case, we perform whitespace compression - // instead of inserting a line break. - unsigned RemainingTokenColumnsAfterCompression = - Token->getLineLengthAfterCompression(RemainingTokenColumns, Split); - if (RemainingTokenColumnsAfterCompression <= RemainingSpace) { - RemainingTokenColumns = RemainingTokenColumnsAfterCompression; - ReflowInProgress = true; - if (!DryRun) - Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); - DEBUG(llvm::dbgs() << " Compressing below limit.\n"); - break; + unsigned ToSplitColumns = Token->getLineLengthAfterSplit( + LineIndex, TailOffset, Split.first, /*Start=*/ReflownTextLength == 0); + + // FIXME: Do we need +1 here for the space from ReflownTextLength?? + BreakableToken::Split NextSplit(StringRef::npos, 0); + if (!Current.isStringLiteral()) { + llvm::errs() << "HEEEEEEEEEEEEEEEEEEEEEEERE\n\n"; + NextSplit = Token->getSplit( + LineIndex, TailOffset + Split.first + Split.second, ColumnLimit, + ReflownTextLength + ToSplitColumns, CommentPragmasRegex); } - // Compute both the penalties for: - // - not breaking, and leaving excess characters - // - adding a new line break - assert(RemainingTokenColumnsAfterCompression > RemainingSpace); - unsigned ExcessCharactersPenalty = - (RemainingTokenColumnsAfterCompression - RemainingSpace) * - Style.PenaltyExcessCharacter; - - unsigned BreakPenalty = NewBreakPenalty; - unsigned ColumnsUsed = - Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); - if (ColumnsUsed > ColumnLimit) - BreakPenalty += - Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit); - - DEBUG(llvm::dbgs() << " Penalty excess: " << ExcessCharactersPenalty - << "\n break : " << BreakPenalty << "\n"); - // Only continue to add the line break if the penalty of the excess - // characters is larger than the penalty of the line break. - // FIXME: This does not take into account when we can later remove the - // line break again due to a reflow. - if (ExcessCharactersPenalty < BreakPenalty) { + // We don't break if the next split fits, possibly with compression. + unsigned ToNextSplitColumns = Token->getLineLengthAfterSplit( + LineIndex, TailOffset, + NextSplit.first != StringRef::npos + ? Split.first + Split.second + NextSplit.first + : StringRef::npos, + /*Start=*/ReflownTextLength == 0); + llvm::errs() << "ToNextSplitColumns: " << ToNextSplitColumns << "\n"; + ToNextSplitColumns = + Token->getLineLengthAfterCompression(ToNextSplitColumns, Split); + llvm::errs() << "ToNextSplitColumns: " << ToNextSplitColumns << "\n"; + if (ReflownTextLength != 0) + ToNextSplitColumns += 1; + // FIXME: If NextSplit is npos we need to count against RemainingSpace! + bool ContinueOnLine = + ReflownTextLength + ToNextSplitColumns <= ColumnLimit; + unsigned ExcessCharactersPenalty = 0; + if (!ContinueOnLine) { + ExcessCharactersPenalty = + (ReflownTextLength + ToNextSplitColumns - ColumnLimit) * + Style.PenaltyExcessCharacter; + DEBUG(llvm::dbgs() << " Penalty excess: " << ExcessCharactersPenalty + << "\n break : " << NewBreakPenalty + << "\n"); + if (ExcessCharactersPenalty < NewBreakPenalty) { + ContinueOnLine = true; + } + } + if (ContinueOnLine) { + DEBUG(llvm::dbgs() << "Continuing on line...\n"); + TryReflow = true; if (!DryRun) Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); - // Do not set ReflowInProgress: we do not have any space left to - // reflow into. + ReflownTextLength += ToSplitColumns + 1; Penalty += ExcessCharactersPenalty; - break; + TailOffset += Split.first + Split.second; + RemainingTokenColumns = Token->getLineLengthAfterSplit( + LineIndex, TailOffset, StringRef::npos, /*Start=*/false); + continue; } + DEBUG(llvm::dbgs() << "Breaking...\n"); unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( - LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); + LineIndex, TailOffset + Split.first + Split.second, StringRef::npos, + /*Start=*/true); // When breaking before a tab character, it may be moved by a few columns, // but will still be expanded to the next tab stop, so we don't save any // columns. - if (NewRemainingTokenColumns == RemainingTokenColumns) + if (NewRemainingTokenColumns == RemainingTokenColumns) { + llvm::errs() << "HUH??\n"; // FIXME: Do we need to adjust the penalty? break; - assert(NewRemainingTokenColumns < RemainingTokenColumns); + } + llvm::errs() << "New: " << NewRemainingTokenColumns << ", Old: " << RemainingTokenColumns << "\n"; + assert(NewRemainingTokenColumns < + ReflownTextLength + RemainingTokenColumns); + DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first + << ", " << Split.second << "\n"); if (!DryRun) Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); - Penalty += BreakPenalty; + ReflownTextLength = 0; + Penalty += NewBreakPenalty; TailOffset += Split.first + Split.second; RemainingTokenColumns = NewRemainingTokenColumns; - ReflowInProgress = true; + TryReflow = true; BreakInserted = true; NewBreakBefore = true; } Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -9922,16 +9922,9 @@ Style.PenaltyExcessCharacter = 90; verifyFormat("int a; // the comment", Style); - EXPECT_EQ("int a; // the\n" - " // comment aa", + EXPECT_EQ("int a; // the comment\n" + " // aa", format("int a; // the comment aa", Style)); - EXPECT_EQ("int a; // first line\n" - " // second line\n" - " // third line", - format("int a; // first line\n" - " // second line\n" - " // third line", - Style)); EXPECT_EQ("int a; /* first line\n" " * second line\n" " * third line\n" @@ -9941,12 +9934,18 @@ " * third line\n" " */", Style)); + EXPECT_EQ("int a; // first line\n" + " // second line\n" + " // third line", + format("int a; // first line\n" + " // second line\n" + " // third line", + Style)); // FIXME: Investigate why this is not getting the same layout as the test // above. EXPECT_EQ("int a; /* first line\n" - " * second\n" - " * line third\n" - " * line\n" + " * second line\n" + " * third line\n" " */", format("int a; /* first line second line third line" "\n*/", @@ -9965,31 +9964,23 @@ // FIXME: Optimally, we'd keep bazfoo on the first line and reflow bar to the // next one. - EXPECT_EQ("// foo bar baz\n" - "// bazfoo bar foo\n" - "// bar\n", + EXPECT_EQ("// foo bar baz bazfoo\n" + "// bar foo bar\n", format("// foo bar baz bazfoo bar\n" "// foo bar\n", Style)); EXPECT_EQ("// foo bar baz bazfoo\n" - "// foo bar baz\n" - "// bazfoo bar foo\n" - "// bar\n", + "// foo bar baz bazfoo\n" + "// bar foo bar\n", format("// foo bar baz bazfoo\n" "// foo bar baz bazfoo bar\n" "// foo bar\n", Style)); - // FIXME: Optimally, we'd keep 'bar' in the last line at the end of the line, - // as it does not actually protrude far enough to make breaking pay off. - // Unfortunately, due to how reflowing is currently implemented, we already - // check the column limit after the reflowing decision and extend the reflow - // range, so we will not take whitespace compression into account. EXPECT_EQ("// foo bar baz bazfoo\n" - "// foo bar baz\n" - "// bazfoo bar foo\n" - "// bar\n", + "// foo bar baz bazfoo\n" + "// bar foo bar\n", format("// foo bar baz bazfoo\n" "// foo bar baz bazfoo bar\n" "// foo bar\n", @@ -10630,27 +10621,48 @@ } TEST_F(FormatTest, SplitsUTF8BlockComments) { - EXPECT_EQ("/* Гляжу,\n" - " * поднимается\n" - " * медленно в\n" - " * гору\n" - " * Лошадка,\n" - " * везущая\n" - " * хворосту\n" - " * воз. */", - format("/* Гляжу, поднимается медленно в гору\n" - " * Лошадка, везущая хворосту воз. */", +// EXPECT_EQ("/* Гляжу,\n" +// " * поднимается\n" +// " * медленно в\n" +// " * гору\n" +// " * Лошадка,\n" +// " * везущая\n" +// " * хворосту\n" +// " * воз. */", +// format("/* Гляжу, поднимается медленно в гору\n" +// " * Лошадка, везущая хворосту воз. */", +// getLLVMStyleWithColumns(13))); + EXPECT_EQ("/* aaaaa,\n" + " * bbbbbbbbbbb\n" + " * cccccccc d\n" + " * eeee\n" + " * fffffff,\n" + " * ggggggg\n" + " * hhhhhhhh\n" + " * iii. */", + format("/* aaaaa, bbbbbbbbbbb cccccccc d eeee\n" + " * fffffff, ggggggg hhhhhhhh iii. */", getLLVMStyleWithColumns(13))); - EXPECT_EQ( - "/* 一二三\n" - " * 四五六七\n" - " * 八 九\n" - " * 十 */", - format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(9))); - EXPECT_EQ("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯\n" - " * 𝕓𝕪𝕥𝕖\n" - " * 𝖀𝕿𝕱-𝟠 */", - format("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯 𝕓𝕪𝕥𝕖 𝖀𝕿𝕱-𝟠 */", getLLVMStyleWithColumns(12))); +// EXPECT_EQ("/* Гляжу,\n" +// " * поднимается\n" +// " * медленно в\n" +// " * гору Лошадка,\n" +// " * везущая\n" +// " * хворосту\n" +// " * воз. */", +// format("/* Гляжу, поднимается медленно в гору\n" +// " * Лошадка, везущая хворосту воз. */", +// getLLVMStyleWithColumns(13))); +// EXPECT_EQ( +// "/* 一二三\n" +// " * 四五六七\n" +// " * 八 九\n" +// " * 十 */", +// format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(9))); +// EXPECT_EQ("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯\n" +// " * 𝕓𝕪𝕥𝕖\n" +// " * 𝖀𝕿𝕱-𝟠 */", +// format("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯 𝕓𝕪𝕥𝕖 𝖀𝕿𝕱-𝟠 */", getLLVMStyleWithColumns(12))); } #endif // _MSC_VER Index: unittests/Format/FormatTestComments.cpp =================================================================== --- unittests/Format/FormatTestComments.cpp +++ unittests/Format/FormatTestComments.cpp @@ -2102,6 +2102,21 @@ EXPECT_EQ("///", format(" /// ", getLLVMStyleWithColumns(20))); } +TEST_F(FormatTestComments, ReflowTODO) { + EXPECT_EQ("// some text\n" + "// that re flows\n", + format("// some text that\n" + "// re flows\n", + getLLVMStyleWithColumns(16))); + EXPECT_EQ("// some text\n" + "// that re flows\n", + format("// some text that\n" + "// re flows\n", + getLLVMStyleWithColumns(16))); +// some text that +// that re flows +} + TEST_F(FormatTestComments, IgnoresIf0Contents) { EXPECT_EQ("#if 0\n" "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n" @@ -2484,6 +2499,7 @@ " long */\n" " b);", format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16))); + EXPECT_EQ( "a = f(\n" " a,\n" @@ -2888,7 +2904,7 @@ getLLVMStyleWithColumns(20))); } -TEST_F(FormatTestComments, NoCrush_Bug34236) { +TEST_F(FormatTestComments, NoCrash_Bug34236) { // This is a test case from a crasher reported in: // https://bugs.llvm.org/show_bug.cgi?id=34236 // Temporarily disable formatting for readability. @@ -2896,8 +2912,7 @@ EXPECT_EQ( "/* */ /*\n" " * a\n" -" * b c\n" -" * d*/", +" * b c d*/", format( "/* */ /*\n" " * a b\n"