Index: include/clang/Format/Format.h =================================================================== --- include/clang/Format/Format.h +++ include/clang/Format/Format.h @@ -1273,6 +1273,13 @@ /// \brief Pointer and reference alignment style. PointerAlignmentStyle PointerAlignment; + struct RawStringFormat { + std::string Delimiter; + LanguageKind Language; + }; + + std::vector RawStringFormats; + /// \brief If ``true``, clang-format will attempt to re-flow comments. /// \code /// false: @@ -1650,6 +1657,13 @@ unsigned Line = 0; }; +tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, + ArrayRef Ranges, + unsigned FirstStartColumn, + unsigned NextStartColumn, + StringRef FileName, + FormattingAttemptStatus *Status); + /// \brief Reformats the given \p Ranges in \p Code. /// /// Each range is extended on either end to its next bigger logic unit, i.e. Index: lib/Format/ContinuationIndenter.h =================================================================== --- lib/Format/ContinuationIndenter.h +++ lib/Format/ContinuationIndenter.h @@ -30,8 +30,29 @@ struct FormatToken; struct LineState; struct ParenState; +struct RawStringFormatStyleManager; class WhitespaceManager; +struct RawStringFormatStyleManager { + llvm::StringMap DelimiterStyle; + + RawStringFormatStyleManager(const FormatStyle &CodeStyle) { + for (const auto &RawStringFormat : CodeStyle.RawStringFormats) { + FormatStyle Style = getGoogleStyle(RawStringFormat.Language); + Style.ColumnLimit = CodeStyle.ColumnLimit; + DelimiterStyle.insert({RawStringFormat.Delimiter, Style}); + } + } + + llvm::Optional get(StringRef Delimiter) const { + std::string LowerDelimiter = Delimiter.lower(); + auto It = DelimiterStyle.find(LowerDelimiter); + if (It == DelimiterStyle.end()) + return None; + return It->second; + } +}; + class ContinuationIndenter { public: /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in @@ -45,8 +66,8 @@ /// \brief Get the initial state, i.e. the state after placing \p Line's /// first token at \p FirstIndent. - LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, - bool DryRun); + LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, + const AnnotatedLine *Line, bool DryRun); // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a // better home. @@ -88,15 +109,19 @@ /// \brief Update 'State' with the next token opening a nested block. void moveStateToNewBlock(LineState &State); + void reformatRawStringLiteral(const FormatToken &Current, LineState &State, + StringRef Delimiter, + const FormatStyle &RawStringStyle, bool DryRun); + /// \brief If the current token sticks out over the end of the line, break /// it if possible. /// /// \returns An extra penalty if a token was broken, otherwise 0. /// - /// The returned penalty will cover the cost of the additional line breaks and - /// column limit violation in all lines except for the last one. The penalty - /// for the column limit violation in the last line (and in single line - /// tokens) is handled in \c addNextStateToQueue. + /// The returned penalty will cover the cost of the additional line breaks + /// and column limit violation in all lines except for the last one. The + /// penalty for the column limit violation in the last line (and in single + /// line tokens) is handled in \c addNextStateToQueue. unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun); @@ -143,6 +168,7 @@ encoding::Encoding Encoding; bool BinPackInconclusiveFunctions; llvm::Regex CommentPragmasRegex; + const RawStringFormatStyleManager RawStringFormats; }; struct ParenState { Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -76,6 +76,31 @@ (LessTok.Previous && LessTok.Previous->is(tok::equal)))); } +// Returns the delimiter of a raw string literal, or None if TokenText is not +// the text of a raw string literal. +// For example, the delimiter of R"deli(cont)deli" is deli. +static llvm::Optional getRawStringDelimiter(StringRef TokenText) { + if (TokenText.size() < 5 // The smallest raw string is R"()" + || !TokenText.startswith("R\"") || !TokenText.endswith("\"")) + return None; + + // A raw string starts with 'R"(' and delimiter is ascii and has + // size at most 16 by the standard, so the first '(' must be among the first + // 19 bytes. + size_t LParenPos = TokenText.substr(0, 19).find_first_of('('); + if (LParenPos == StringRef::npos) + return None; + StringRef Delimiter = TokenText.substr(2, LParenPos - 2); + + // Check that the string ends in ')Delimiter"' + size_t RParenPos = TokenText.size() - Delimiter.size() - 2; + if (TokenText[RParenPos] != ')') + return None; + if (!TokenText.substr(RParenPos + 1).startswith(Delimiter)) + return None; + return Delimiter; +} + ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, @@ -85,14 +110,15 @@ : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr), Whitespaces(Whitespaces), Encoding(Encoding), BinPackInconclusiveFunctions(BinPackInconclusiveFunctions), - CommentPragmasRegex(Style.CommentPragmas) {} + CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {} LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, + unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun) { LineState State; State.FirstIndent = FirstIndent; - State.Column = FirstIndent; + State.Column = FirstStartColumn ? FirstStartColumn : FirstIndent; State.Line = Line; State.NextToken = Line->First; State.Stack.push_back(ParenState(FirstIndent, FirstIndent, @@ -940,7 +966,21 @@ !Current.isStringLiteral()) State.StartOfStringLiteral = 0; - State.Column += Current.ColumnWidth; + bool AddColumnWidth = true; + if (Current.isStringLiteral()) { + if (llvm::Optional Delimiter = + getRawStringDelimiter(Current.TokenText)) { + if (llvm::Optional RawStringStyle = + RawStringFormats.get(*Delimiter)) { + reformatRawStringLiteral(Current, State, *Delimiter, *RawStringStyle, + DryRun); + AddColumnWidth = false; + } + } + } + + if (AddColumnWidth) + State.Column += Current.ColumnWidth; State.NextToken = State.NextToken->Next; unsigned Penalty = 0; if (CanBreakProtrudingToken) @@ -1200,6 +1240,65 @@ State.Stack.back().BreakBeforeParameter = true; } +static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn, + unsigned TabWidth, + encoding::Encoding Encoding) { + size_t LastNewlinePos = Text.find_last_of("\n"); + if (LastNewlinePos == StringRef::npos) { + return StartColumn + + encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding); + } else { + return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos), + /*StartColumn=*/0, TabWidth, Encoding); + } +} + +void ContinuationIndenter::reformatRawStringLiteral( + const FormatToken &Current, LineState &State, StringRef Delimiter, + const FormatStyle &RawStringStyle, bool DryRun) { + if (DryRun) + return; + // The text of a raw string is between the leading 'R"delimiter(' and the + // trailing 'delimiter)"'. + size_t PrefixSize = 3 + Delimiter.size(); + size_t SuffixSize = 2 + Delimiter.size(); + std::string RawText = + Current.TokenText.substr(PrefixSize).drop_back(SuffixSize); + + // The first start column is the column the raw text starts. + unsigned FirstStartColumn = State.Column + PrefixSize; + // The next start column is the intended indentation a line break inside + // the raw string at level 0. + unsigned NextStartColumn = State.FirstIndent + Style.ContinuationIndentWidth; + + tooling::Replacements Fixes = + reformat(RawStringStyle, RawText, {tooling::Range(0, RawText.size())}, + FirstStartColumn, NextStartColumn, "", + /*FormattingAttemptStatus=*/nullptr); + + auto NewCode = applyAllReplacements(RawText, Fixes); + if (!NewCode) + return; + + unsigned LastLineEndColumn = getLastLineEndColumn(*NewCode, FirstStartColumn, + Style.TabWidth, Encoding); + + State.Column = LastLineEndColumn + SuffixSize; + + SourceLocation OriginLoc = + Current.Tok.getLocation().getLocWithOffset(PrefixSize); + + for (const tooling::Replacement &Fix : Fixes) { + auto Err = Whitespaces.addReplacement(tooling::Replacement( + SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()), Fix.getLength(), + Fix.getReplacementText())); + if (Err) { + llvm::errs() << "Failed to reformat raw string: " + << llvm::toString(std::move(Err)) << "\n"; + } + } +} + unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, LineState &State) { if (!Current.IsMultiline) Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -44,7 +44,8 @@ using clang::format::FormatStyle; -LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory) +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory); +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat); namespace llvm { namespace yaml { @@ -427,6 +428,13 @@ } }; +template <> struct MappingTraits { + static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) { + IO.mapOptional("Delimiter", Format.Delimiter); + IO.mapOptional("Language", Format.Language); + } +}; + // Allows to read vector while keeping default values. // IO.getContext() should contain a pointer to the FormatStyle structure, that // will be used to get default values for missing keys. @@ -604,6 +612,8 @@ LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.Standard = FormatStyle::LS_Cpp11; LLVMStyle.UseTab = FormatStyle::UT_Never; + LLVMStyle.RawStringFormats = {{"pb", FormatStyle::LK_TextProto}, + {"proto", FormatStyle::LK_TextProto}}; LLVMStyle.ReflowComments = true; LLVMStyle.SpacesInParentheses = false; LLVMStyle.SpacesInSquareBrackets = false; @@ -989,7 +999,11 @@ BinPackInconclusiveFunctions); UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), Env.getSourceManager(), Status) - .format(AnnotatedLines); + .format(AnnotatedLines, /*DryRun=*/false, + /*AdditionalIndent=*/0, + /*FixBadIndentation=*/false, + /*FirstStartColumn=*/Env.getFirstStartColumn(), + /*NextStartColumn=*/Env.getNextStartColumn()); for (const auto &R : Whitespaces.generateReplacements()) if (Result.add(R)) return Result; @@ -1889,6 +1903,8 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, + unsigned FirstStartColumn, + unsigned NextStartColumn, StringRef FileName, FormattingAttemptStatus *Status) { FormatStyle Expanded = expandPresets(Style); @@ -1923,8 +1939,8 @@ return Formatter(Env, Expanded, Status).process(); }); - std::unique_ptr Env = - Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + std::unique_ptr Env = Environment::CreateVirtualEnvironment( + Code, FileName, Ranges, FirstStartColumn, NextStartColumn); llvm::Optional CurrentCode = None; tooling::Replacements Fixes; for (size_t I = 0, E = Passes.size(); I < E; ++I) { @@ -1937,7 +1953,8 @@ CurrentCode = std::move(*NewCode); Env = Environment::CreateVirtualEnvironment( *CurrentCode, FileName, - tooling::calculateRangesAfterReplacements(Fixes, Ranges)); + tooling::calculateRangesAfterReplacements(Fixes, Ranges), + FirstStartColumn, NextStartColumn); } } } @@ -1945,6 +1962,15 @@ return Fixes; } +tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, + ArrayRef Ranges, + StringRef FileName, + FormattingAttemptStatus *Status) { + return reformat(Style, Code, Ranges, + /*FirstStartColumn=*/0, + /*NextStartColumn=*/0, FileName, Status); +} + tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, StringRef FileName) { Index: lib/Format/FormatTokenLexer.h =================================================================== --- lib/Format/FormatTokenLexer.h +++ lib/Format/FormatTokenLexer.h @@ -36,7 +36,7 @@ class FormatTokenLexer { public: - FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, + FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding); ArrayRef lex(); Index: lib/Format/FormatTokenLexer.cpp =================================================================== --- lib/Format/FormatTokenLexer.cpp +++ lib/Format/FormatTokenLexer.cpp @@ -24,10 +24,10 @@ namespace format { FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, - const FormatStyle &Style, + unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding) : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), - Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), + Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), Style(Style), IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), Index: lib/Format/TokenAnalyzer.h =================================================================== --- lib/Format/TokenAnalyzer.h +++ lib/Format/TokenAnalyzer.h @@ -37,21 +37,30 @@ class Environment { public: Environment(SourceManager &SM, FileID ID, ArrayRef Ranges) - : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {} + : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM), + FirstStartColumn(0), + NextStartColumn(0) {} Environment(FileID ID, std::unique_ptr FileMgr, std::unique_ptr VirtualSM, std::unique_ptr Diagnostics, - const std::vector &CharRanges) + const std::vector &CharRanges, + unsigned FirstStartColumn, + unsigned NextStartColumn) : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()), - SM(*VirtualSM), FileMgr(std::move(FileMgr)), + SM(*VirtualSM), + FirstStartColumn(FirstStartColumn), + NextStartColumn(NextStartColumn), + FileMgr(std::move(FileMgr)), VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {} // This sets up an virtual file system with file \p FileName containing \p // Code. static std::unique_ptr CreateVirtualEnvironment(StringRef Code, StringRef FileName, - ArrayRef Ranges); + ArrayRef Ranges, + unsigned FirstStartColumn = 0, + unsigned NextStartColumn = 0); FileID getFileID() const { return ID; } @@ -59,10 +68,16 @@ const SourceManager &getSourceManager() const { return SM; } + unsigned getFirstStartColumn() const { return FirstStartColumn; } + + unsigned getNextStartColumn() const { return NextStartColumn; } + private: FileID ID; SmallVector CharRanges; SourceManager &SM; + unsigned FirstStartColumn; + unsigned NextStartColumn; // The order of these fields are important - they should be in the same order // as they are created in `CreateVirtualEnvironment` so that they can be Index: lib/Format/TokenAnalyzer.cpp =================================================================== --- lib/Format/TokenAnalyzer.cpp +++ lib/Format/TokenAnalyzer.cpp @@ -38,7 +38,9 @@ // Code. std::unique_ptr Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName, - ArrayRef Ranges) { + ArrayRef Ranges, + unsigned FirstStartColumn, + unsigned NextStartColumn) { // This is referenced by `FileMgr` and will be released by `FileMgr` when it // is deleted. IntrusiveRefCntPtr InMemoryFileSystem( @@ -69,9 +71,9 @@ SourceLocation End = Start.getLocWithOffset(Range.getLength()); CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); } - return llvm::make_unique(ID, std::move(FileMgr), - std::move(VirtualSM), - std::move(Diagnostics), CharRanges); + return llvm::make_unique( + ID, std::move(FileMgr), std::move(VirtualSM), std::move(Diagnostics), + CharRanges, FirstStartColumn, NextStartColumn); } TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) @@ -90,10 +92,11 @@ tooling::Replacements TokenAnalyzer::process() { tooling::Replacements Result; - FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style, - Encoding); + FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), + Env.getFirstStartColumn(), Style, Encoding); - UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this); + UnwrappedLineParser Parser(Style, Tokens.getKeywords(), + Env.getFirstStartColumn(), Tokens.lex(), *this); Parser.parse(); assert(UnwrappedLines.rbegin()->empty()); for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) { Index: lib/Format/TokenAnnotator.h =================================================================== --- lib/Format/TokenAnnotator.h +++ lib/Format/TokenAnnotator.h @@ -43,7 +43,8 @@ InPPDirective(Line.InPPDirective), MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), IsMultiVariableDeclStmt(false), Affected(false), - LeadingEmptyLinesAffected(false), ChildrenAffected(false) { + LeadingEmptyLinesAffected(false), ChildrenAffected(false), + FirstStartColumn(Line.FirstStartColumn) { assert(!Line.Tokens.empty()); // Calculate Next and Previous for all tokens. Note that we must overwrite @@ -127,6 +128,8 @@ /// \c True if one of this line's children intersects with an input range. bool ChildrenAffected; + unsigned FirstStartColumn; + private: // Disallow copying. AnnotatedLine(const AnnotatedLine &) = delete; Index: lib/Format/TokenAnnotator.cpp =================================================================== --- lib/Format/TokenAnnotator.cpp +++ lib/Format/TokenAnnotator.cpp @@ -1872,7 +1872,8 @@ } Line.First->TotalLength = - Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; + Line.First->IsMultiline ? Style.ColumnLimit + : Line.FirstStartColumn + Line.First->ColumnWidth; FormatToken *Current = Line.First->Next; bool InFunctionDecl = Line.MightBeFunctionDecl; while (Current) { Index: lib/Format/UnwrappedLineFormatter.h =================================================================== --- lib/Format/UnwrappedLineFormatter.h +++ lib/Format/UnwrappedLineFormatter.h @@ -41,7 +41,9 @@ /// \brief Format the current block and return the penalty. unsigned format(const SmallVectorImpl &Lines, bool DryRun = false, int AdditionalIndent = 0, - bool FixBadIndentation = false); + bool FixBadIndentation = false, + unsigned FirstStartColumn = 0, + unsigned NextStartColumn = 0); private: /// \brief Add a new line and the required indent before the first Token Index: lib/Format/UnwrappedLineFormatter.cpp =================================================================== --- lib/Format/UnwrappedLineFormatter.cpp +++ lib/Format/UnwrappedLineFormatter.cpp @@ -574,7 +574,9 @@ /// \brief Formats an \c AnnotatedLine and returns the penalty. /// /// If \p DryRun is \c false, directly applies the changes. - virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + virtual unsigned formatLine(const AnnotatedLine &Line, + unsigned FirstIndent, + unsigned FirstStartColumn, bool DryRun) = 0; protected: @@ -645,7 +647,8 @@ *Child->First, /*Newlines=*/0, /*Spaces=*/1, /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); } - Penalty += formatLine(*Child, State.Column + 1, DryRun); + Penalty += + formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun); State.Column += 1 + Child->Last->TotalLength; return true; @@ -671,10 +674,10 @@ /// \brief Formats the line, simply keeping all of the input's line breaking /// decisions. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { + unsigned FirstStartColumn, bool DryRun) override { assert(!DryRun); - LineState State = - Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false); + LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn, + &Line, /*DryRun=*/false); while (State.NextToken) { bool Newline = Indenter->mustBreak(State) || @@ -697,9 +700,10 @@ /// \brief Puts all tokens into a single line. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { + unsigned FirstStartColumn, bool DryRun) override { unsigned Penalty = 0; - LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + LineState State = + Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); while (State.NextToken) { formatChildren(State, /*Newline=*/false, DryRun, Penalty); Indenter->addTokenToState( @@ -721,8 +725,9 @@ /// \brief Formats the line by finding the best line breaks with line lengths /// below the column limit. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { - LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + unsigned FirstStartColumn, bool DryRun) override { + LineState State = + Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); // If the ObjC method declaration does not fit on a line, we should format // it with one arg per line. @@ -888,7 +893,9 @@ unsigned UnwrappedLineFormatter::format(const SmallVectorImpl &Lines, bool DryRun, int AdditionalIndent, - bool FixBadIndentation) { + bool FixBadIndentation, + unsigned FirstStartColumn, + unsigned NextStartColumn) { LineJoiner Joiner(Style, Keywords, Lines); // Try to look up already computed penalty in DryRun-mode. @@ -908,9 +915,10 @@ // The minimum level of consecutive lines that have been formatted. unsigned RangeMinLevel = UINT_MAX; + bool FirstLine = true; for (const AnnotatedLine *Line = Joiner.getNextMergedLine(DryRun, IndentTracker); - Line; Line = NextLine) { + Line; Line = NextLine, FirstLine = false) { const AnnotatedLine &TheLine = *Line; unsigned Indent = IndentTracker.getIndent(); @@ -944,16 +952,18 @@ (TheLine.Type == LT_ImportStatement && (Style.Language != FormatStyle::LK_JavaScript || !Style.JavaScriptWrapImports)); - if (Style.ColumnLimit == 0) NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); else if (FitsIntoOneLine) Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); else Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); RangeMinLevel = std::min(RangeMinLevel, TheLine.Level); } else { // If no token in the current line is affected, we still need to format Index: lib/Format/UnwrappedLineParser.h =================================================================== --- lib/Format/UnwrappedLineParser.h +++ lib/Format/UnwrappedLineParser.h @@ -56,6 +56,8 @@ size_t MatchingOpeningBlockLineIndex; static const size_t kInvalidIndex = -1; + + unsigned FirstStartColumn = 0; }; class UnwrappedLineConsumer { @@ -71,6 +73,7 @@ public: UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, + unsigned FirstStartColumn, ArrayRef Tokens, UnwrappedLineConsumer &Callback); @@ -235,6 +238,8 @@ // sequence. std::stack PPChainBranchIndex; + unsigned FirstStartColumn; + friend class ScopedLineState; friend class CompoundStatementIndenter; }; Index: lib/Format/UnwrappedLineParser.cpp =================================================================== --- lib/Format/UnwrappedLineParser.cpp +++ lib/Format/UnwrappedLineParser.cpp @@ -226,12 +226,14 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, + unsigned FirstStartColumn, ArrayRef Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), CurrentLines(&Lines), Style(Style), Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), - Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} + Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), + FirstStartColumn(FirstStartColumn) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; @@ -243,10 +245,12 @@ CurrentLines = &Lines; DeclarationScopeStack.clear(); PPStack.clear(); + Line->FirstStartColumn = FirstStartColumn; } void UnwrappedLineParser::parse() { IndexedTokenSource TokenSource(AllTokens); + Line->FirstStartColumn = FirstStartColumn; do { DEBUG(llvm::dbgs() << "----\n"); reset(); @@ -2108,7 +2112,8 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix = "") { - llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" + llvm::dbgs() << Prefix << "Line(" << Line.Level + << ", FSC=" << Line.FirstStartColumn << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; for (std::list::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); @@ -2141,6 +2146,7 @@ CurrentLines->push_back(std::move(*Line)); Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; + Line->FirstStartColumn = 0; if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), Index: lib/Format/WhitespaceManager.h =================================================================== --- lib/Format/WhitespaceManager.h +++ lib/Format/WhitespaceManager.h @@ -57,6 +57,8 @@ /// was not called. void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); + llvm::Error addReplacement(const tooling::Replacement &Replacement); + /// \brief Inserts or replaces whitespace in the middle of a token. /// /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix Index: lib/Format/WhitespaceManager.cpp =================================================================== --- lib/Format/WhitespaceManager.cpp +++ lib/Format/WhitespaceManager.cpp @@ -67,6 +67,11 @@ /*IsInsideToken=*/false)); } +llvm::Error +WhitespaceManager::addReplacement(const tooling::Replacement &Replacement) { + return Replaces.add(Replacement); +} + void WhitespaceManager::replaceWhitespaceInToken( const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, Index: unittests/Format/CMakeLists.txt =================================================================== --- unittests/Format/CMakeLists.txt +++ unittests/Format/CMakeLists.txt @@ -10,6 +10,7 @@ FormatTestJava.cpp FormatTestObjC.cpp FormatTestProto.cpp + FormatTestRawStrings.cpp FormatTestSelective.cpp FormatTestTextProto.cpp NamespaceEndCommentsFixerTest.cpp Index: unittests/Format/FormatTestRawStrings.cpp =================================================================== --- /dev/null +++ unittests/Format/FormatTestRawStrings.cpp @@ -0,0 +1,190 @@ +//===- unittest/Format/FormatTestRawStrings.cpp - Formatting unit tests ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Format/Format.h" + +#include "../Tooling/ReplacementTest.h" +#include "FormatTestUtils.h" + +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MemoryBuffer.h" +#include "gtest/gtest.h" + +#define DEBUG_TYPE "format-test" + +using clang::tooling::ReplacementTest; +using clang::tooling::toReplacements; + +namespace clang { +namespace format { +namespace { + +FormatStyle getGoogleStyle() { return getGoogleStyle(FormatStyle::LK_Cpp); } + +class FormatTestRawStrings : public ::testing::Test { +protected: + enum StatusCheck { + SC_ExpectComplete, + SC_ExpectIncomplete, + SC_DoNotCheck + }; + + std::string format(llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle(), + StatusCheck CheckComplete = SC_ExpectComplete) { + DEBUG(llvm::errs() << "---\n"); + DEBUG(llvm::errs() << Code << "\n\n"); + std::vector Ranges(1, tooling::Range(0, Code.size())); + FormattingAttemptStatus Status; + tooling::Replacements Replaces = + reformat(Style, Code, Ranges, "", &Status); + if (CheckComplete != SC_DoNotCheck) { + bool ExpectedCompleteFormat = CheckComplete == SC_ExpectComplete; + EXPECT_EQ(ExpectedCompleteFormat, Status.FormatComplete) + << Code << "\n\n"; + } + ReplacementCount = Replaces.size(); + auto Result = applyAllReplacements(Code, Replaces); + EXPECT_TRUE(static_cast(Result)); + DEBUG(llvm::errs() << "\n" << *Result << "\n\n"); + return *Result; + } + + FormatStyle getStyleWithColumns(FormatStyle Style, unsigned ColumnLimit) { + Style.ColumnLimit = ColumnLimit; + return Style; + } + + FormatStyle getLLVMStyleWithColumns(unsigned ColumnLimit) { + return getStyleWithColumns(getLLVMStyle(), ColumnLimit); + } + + FormatStyle getGoogleStyleWithColumns(unsigned ColumnLimit) { + return getStyleWithColumns(getGoogleStyle(), ColumnLimit); + } + + void verifyFormat(llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + EXPECT_EQ(Code.str(), format(test::messUp(Code), Style)); + if (Style.Language == FormatStyle::LK_Cpp) { + // Objective-C++ is a superset of C++, so everything checked for C++ + // needs to be checked for Objective-C++ as well. + FormatStyle ObjCStyle = Style; + ObjCStyle.Language = FormatStyle::LK_ObjC; + EXPECT_EQ(Code.str(), format(test::messUp(Code), ObjCStyle)); + } + } + + void verifyIncompleteFormat(llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + EXPECT_EQ(Code.str(), + format(test::messUp(Code), Style, SC_ExpectIncomplete)); + } + + void verifyGoogleFormat(llvm::StringRef Code) { + verifyFormat(Code, getGoogleStyle()); + } + + void verifyIndependentOfContext(llvm::StringRef text) { + verifyFormat(text); + verifyFormat(llvm::Twine("void f() { " + text + " }").str()); + } + + /// \brief Verify that clang-format does not crash on the given input. + void verifyNoCrash(llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + format(Code, Style, SC_DoNotCheck); + } + + int ReplacementCount; + + FormatStyle getRawStringPbStyleWithColumns(unsigned Columns) { + FormatStyle Style = getLLVMStyleWithColumns(Columns); + Style.RawStringFormats = {{/*Delimiter=*/"pb", + /*Kind=*/FormatStyle::LK_TextProto}}; + return Style; + } +}; + +TEST_F(FormatTestRawStrings, ReformatsShortRawStringsOnSingleLine) { + EXPECT_EQ( + R"test(P p = TP(R"pb()pb");)test", + format( + R"test(P p = TP(R"pb( )pb");)test", + getRawStringPbStyleWithColumns(40))); + EXPECT_EQ( + R"test(P p = TP(R"pb(item_1: 1)pb");)test", + format( + R"test(P p = TP(R"pb(item_1:1)pb");)test", + getRawStringPbStyleWithColumns(40))); + EXPECT_EQ( + R"test(P p = TP(R"pb(item_1: 1)pb");)test", + format( + R"test(P p = TP(R"pb( item_1 : 1 )pb");)test", + getRawStringPbStyleWithColumns(40))); + EXPECT_EQ( + R"test(P p = TP(R"pb(item_1: 1 item_2: 2)pb");)test", + format( + R"test(P p = TP(R"pb(item_1:1 item_2:2)pb");)test", + getRawStringPbStyleWithColumns(40))); + EXPECT_EQ( + R"test(P p = TP(R"pb(item_1 <1> item_2: {2})pb");)test", + format( + R"test(P p = TP(R"pb(item_1<1> item_2:{2})pb");)test", + getRawStringPbStyleWithColumns(40))); +} + +TEST_F(FormatTestRawStrings, BreaksRawStringsExceedingColumnLimit) { + EXPECT_EQ(R"test( +P p = TPPPPPPPPPPPPPPP(R"pb(item_1: 1, + item_2: 2)pb");)test", + format(R"test( +P p = TPPPPPPPPPPPPPPP(R"pb(item_1: 1, item_2: 2)pb");)test", + getRawStringPbStyleWithColumns(40))); + + EXPECT_EQ(R"test( +P p = TPPPPPPPPPPPPPPP(R"pb(item_1: 1, + item_2: 2, + item_3: 3)pb");)test", + format(R"test( +P p = TPPPPPPPPPPPPPPP(R"pb(item_1: 1, item_2: 2, item_3: 3)pb");)test", + getRawStringPbStyleWithColumns(40))); + + EXPECT_EQ( + R"test(P p = TP(R"pb(item_1 <1> + item_2: <2> + item_3 {})pb");)test", + format( + R"test(P p = TP(R"pb(item_1<1> item_2:<2> item_3{ })pb");)test", + getRawStringPbStyleWithColumns(40))); + EXPECT_EQ( + R"test(P p = TP(R"pb(item_1: 1, + item_2: 2, + item_3: 3, + item_4: 4)pb");)test", + format( + R"test(P p = TP(R"pb(item_1: 1, item_2: 2, item_3: 3, item_4: 4)pb");)test", + getRawStringPbStyleWithColumns(40))); +} + +TEST_F(FormatTestRawStrings, TODO) { + EXPECT_EQ(R"test( +P p = TPPPPPPPPPPPPPPP(R"pb(item_1 <1>, + item_2: {2}, + item_3: <3>, + item_4: {4})pb");)test", + format(R"test( +P p = TPPPPPPPPPPPPPPP(R"pb(item_1<1>, item_2: {2}, item_3: <3>, item_4:{4})pb");)test", + getRawStringPbStyleWithColumns(40))); +} + +} // end namespace +} // end namespace format +} // end namespace clang