Index: include/clang/Format/Format.h =================================================================== --- include/clang/Format/Format.h +++ include/clang/Format/Format.h @@ -1673,6 +1673,29 @@ /// \brief The number of columns used for tab stops. unsigned TabWidth; + /// \brief A vector of macros that should be interpreted as macros expanding + /// to a string literal encoding prefix instead of as function calls. + /// + /// Some libraries provide macro(s) to change the encoding prefix of string + /// literals depending on configuration, for example, _T() macro on Microsoft + /// platforms. When splitting string literals, the macro should be applied to + /// each fragment of the literal to apply the same encoding prefix to all of + /// them, which requires special treatment from clang-format. This option + /// lists the names of these special macros. + /// + /// These are expected to be macros of the form: + /// \code + /// _T("...some string...") + /// \endcode + /// + /// In the .clang-format configuration file, this can be configured like: + /// \code{.yaml} + /// TMarcos: ['_T', 'myT'] + /// \endcode + /// + /// For example: _T. + std::vector TMacros; + /// \brief Different ways to use tab in formatting. enum UseTabStyle { /// Never use tab. @@ -1781,7 +1804,7 @@ SpacesInParentheses == R.SpacesInParentheses && SpacesInSquareBrackets == R.SpacesInSquareBrackets && Standard == R.Standard && TabWidth == R.TabWidth && - UseTab == R.UseTab; + TMacros == R.TMacros && UseTab == R.UseTab; } llvm::Optional GetLanguageStyle(LanguageKind Language) const; Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -1599,12 +1599,10 @@ // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to // reduce the overhead) for each FormatToken, which is a string, so that we // don't run multiple checks here on the hot path. - if ((Text.endswith(Postfix = "\"") && - (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") || - Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") || - Text.startswith(Prefix = "u8\"") || - Text.startswith(Prefix = "L\""))) || - (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { + if (Text.endswith(Postfix = "\"") && + (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") || + Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") || + Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) { // We need this to address the case where there is an unbreakable tail // only if certain other formatting decisions have been taken. The // UnbreakableTailLength of Current is an overapproximation is that case @@ -1616,6 +1614,25 @@ Current, StartColumn, Prefix, Postfix, UnbreakableTailLength, State.Line->InPPDirective, Encoding, Style); } + if (Current.TMacroStringLiteral && Text.endswith(Postfix = "\")")) { + // If we're inside tmacro we know that we have something like _T(" at the + // beginning of the literal. + size_t pos = Text.find('"'); + if (pos != StringRef::npos) { + Prefix = Text.substr(0, pos + 1); + + // We need this to address the case where there is an unbreakable tail + // only if certain other formatting decisions have been taken. The + // UnbreakableTailLength of Current is an overapproximation is that case + // and we need to be correct here. + unsigned UnbreakableTailLength = (State.NextToken && canBreak(State)) + ? 0 + : Current.UnbreakableTailLength; + return llvm::make_unique( + Current, StartColumn, Prefix, Postfix, UnbreakableTailLength, + State.Line->InPPDirective, Encoding, Style); + } + } } else if (Current.is(TT_BlockComment)) { if (!Style.ReflowComments || // If a comment token switches formatting, like Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -430,6 +430,7 @@ IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets); IO.mapOptional("Standard", Style.Standard); IO.mapOptional("TabWidth", Style.TabWidth); + IO.mapOptional("TMacros", Style.TMacros); IO.mapOptional("UseTab", Style.UseTab); } }; @@ -686,6 +687,7 @@ LLVMStyle.DisableFormat = false; LLVMStyle.SortIncludes = true; LLVMStyle.SortUsingDeclarations = true; + LLVMStyle.TMacros.push_back("_T"); return LLVMStyle; } Index: lib/Format/FormatToken.h =================================================================== --- lib/Format/FormatToken.h +++ lib/Format/FormatToken.h @@ -134,6 +134,9 @@ /// Token. bool HasUnescapedNewline = false; + /// \brief Whether this is a string literal similar to _T("sdfsdf"). + bool TMacroStringLiteral = false; + /// \brief The range of the whitespace immediately preceding the \c Token. SourceRange WhitespaceRange; Index: lib/Format/FormatTokenLexer.cpp =================================================================== --- lib/Format/FormatTokenLexer.cpp +++ lib/Format/FormatTokenLexer.cpp @@ -368,7 +368,8 @@ return false; FormatToken *Macro = Tokens[Tokens.size() - 4]; - if (Macro->TokenText != "_T") + if (std::find(Style.TMacros.begin(), Style.TMacros.end(), Macro->TokenText) == + Style.TMacros.end()) return false; const char *Start = Macro->TokenText.data(); @@ -382,6 +383,7 @@ String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); String->NewlinesBefore = Macro->NewlinesBefore; String->HasUnescapedNewline = Macro->HasUnescapedNewline; + String->TMacroStringLiteral = true; Tokens.pop_back(); Tokens.pop_back(); Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -7935,6 +7935,48 @@ "_T(\"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXn\"));")); } +TEST_F(FormatTest, BreaksStringLiteralsWithin_GenericTMacro) { + FormatStyle Style = getLLVMStyleWithColumns(25); + Style.TMacros.push_back("blablaT"); + EXPECT_EQ( + "blablaT(\"aaaaaaaaaaaaaa\")\n" + "blablaT(\"aaaaaaaaaaaaaa\")\n" + "blablaT(\"aaaaaaaaaaaa\")", + format(" blablaT(\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\")", Style)); + EXPECT_EQ("f(x,\n" + " blablaT(\"aaaaaaaaaaaa\")\n" + " blablaT(\"aaa\"),\n" + " z);", + format("f(x, blablaT(\"aaaaaaaaaaaaaaa\"), z);", Style)); + + // FIXME: Handle embedded spaces in one iteration. + // EXPECT_EQ("blablaT(\"aaaaaaaaaaaaa\")\n" + // "blablaT(\"aaaaaaaaaaaaa\")\n" + // "blablaT(\"aaaaaaaaaaaaa\")\n" + // "blablaT(\"a\")", + // format(" blablaT ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )", + // getLLVMStyleWithColumns(20))); + EXPECT_EQ( + "blablaT ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )", + format(" blablaT ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )", Style)); + EXPECT_EQ("f(\n" + "#if !TEST\n" + " blablaT(\"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXn\")\n" + "#endif\n" + ");", + format("f(\n" + "#if !TEST\n" + "blablaT(\"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXn\")\n" + "#endif\n" + ");")); + EXPECT_EQ("f(\n" + "\n" + " blablaT(\"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXn\"));", + format("f(\n" + "\n" + "blablaT(\"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXn\"));")); +} + TEST_F(FormatTest, BreaksStringLiteralOperands) { // In a function call with two operands, the second can be broken with no line // break before it. @@ -10647,6 +10689,10 @@ " - 'CPPEVAL'\n" " CanonicalDelimiter: 'cc'", RawStringFormats, ExpectedRawStringFormats); + + Style.TMacros.clear(); + std::vector ExpectedTMacros = {"_T", "myT"}; + CHECK_PARSE("TMacros: [_T, myT]", TMacros, ExpectedTMacros); } TEST_F(FormatTest, ParsesConfigurationWithLanguages) {