Skip to content

Commit 412ed09

Browse files
committedJan 19, 2018
[clang-format] Adds a canonical delimiter to raw string formatting
Summary: This patch adds canonical delimiter support to the raw string formatting. This allows matching delimiters to be updated to the canonical one. Reviewers: bkramer Reviewed By: bkramer Subscribers: klimek, cfe-commits Differential Revision: https://reviews.llvm.org/D42187 llvm-svn: 322956
1 parent 33cb845 commit 412ed09

File tree

6 files changed

+122
-25
lines changed

6 files changed

+122
-25
lines changed
 

‎clang/docs/ClangFormatStyleOptions.rst

+4
Original file line numberDiff line numberDiff line change
@@ -1590,6 +1590,9 @@ the configuration (without a prefix: ``Auto``).
15901590
precedence over a matching enclosing function name for determining the
15911591
language of the raw string contents.
15921592

1593+
If a canonical delimiter is specified, occurences of other delimiters for
1594+
the same language will be updated to the canonical if possible.
1595+
15931596
There should be at most one specification per language and each delimiter
15941597
and enclosing function should not occur in multiple specifications.
15951598

@@ -1610,6 +1613,7 @@ the configuration (without a prefix: ``Auto``).
16101613
- 'cc'
16111614
- 'cpp'
16121615
BasedOnStyle: llvm
1616+
CanonicalDelimiter: 'cc'
16131617
16141618
**ReflowComments** (``bool``)
16151619
If ``true``, clang-format will attempt to re-flow comments.

‎clang/include/clang/Format/Format.h

+7
Original file line numberDiff line numberDiff line change
@@ -1369,13 +1369,16 @@ struct FormatStyle {
13691369
std::vector<std::string> Delimiters;
13701370
/// \brief A list of enclosing function names that match this language.
13711371
std::vector<std::string> EnclosingFunctions;
1372+
/// \brief The canonical delimiter for this language.
1373+
std::string CanonicalDelimiter;
13721374
/// \brief The style name on which this raw string format is based on.
13731375
/// If not specified, the raw string format is based on the style that this
13741376
/// format is based on.
13751377
std::string BasedOnStyle;
13761378
bool operator==(const RawStringFormat &Other) const {
13771379
return Language == Other.Language && Delimiters == Other.Delimiters &&
13781380
EnclosingFunctions == Other.EnclosingFunctions &&
1381+
CanonicalDelimiter == Other.CanonicalDelimiter &&
13791382
BasedOnStyle == Other.BasedOnStyle;
13801383
}
13811384
};
@@ -1392,6 +1395,9 @@ struct FormatStyle {
13921395
/// precedence over a matching enclosing function name for determining the
13931396
/// language of the raw string contents.
13941397
///
1398+
/// If a canonical delimiter is specified, occurences of other delimiters for
1399+
/// the same language will be updated to the canonical if possible.
1400+
///
13951401
/// There should be at most one specification per language and each delimiter
13961402
/// and enclosing function should not occur in multiple specifications.
13971403
///
@@ -1410,6 +1416,7 @@ struct FormatStyle {
14101416
/// - 'cc'
14111417
/// - 'cpp'
14121418
/// BasedOnStyle: llvm
1419+
/// CanonicalDelimiter: 'cc'
14131420
/// \endcode
14141421
std::vector<RawStringFormat> RawStringFormats;
14151422

‎clang/lib/Format/ContinuationIndenter.cpp

+64-12
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,18 @@ static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) {
102102
return Delimiter;
103103
}
104104

105+
// Returns the canonical delimiter for \p Language, or the empty string if no
106+
// canonical delimiter is specified.
107+
static StringRef
108+
getCanonicalRawStringDelimiter(const FormatStyle &Style,
109+
FormatStyle::LanguageKind Language) {
110+
for (const auto &Format : Style.RawStringFormats) {
111+
if (Format.Language == Language)
112+
return StringRef(Format.CanonicalDelimiter);
113+
}
114+
return "";
115+
}
116+
105117
RawStringFormatStyleManager::RawStringFormatStyleManager(
106118
const FormatStyle &CodeStyle) {
107119
for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {
@@ -1312,14 +1324,32 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
13121324
const FormatToken &Current, LineState &State,
13131325
const FormatStyle &RawStringStyle, bool DryRun) {
13141326
unsigned StartColumn = State.Column - Current.ColumnWidth;
1315-
auto Delimiter = *getRawStringDelimiter(Current.TokenText);
1327+
StringRef OldDelimiter = *getRawStringDelimiter(Current.TokenText);
1328+
StringRef NewDelimiter =
1329+
getCanonicalRawStringDelimiter(Style, RawStringStyle.Language);
1330+
if (NewDelimiter.empty() || OldDelimiter.empty())
1331+
NewDelimiter = OldDelimiter;
13161332
// The text of a raw string is between the leading 'R"delimiter(' and the
13171333
// trailing 'delimiter)"'.
1318-
unsigned PrefixSize = 3 + Delimiter.size();
1319-
unsigned SuffixSize = 2 + Delimiter.size();
1334+
unsigned OldPrefixSize = 3 + OldDelimiter.size();
1335+
unsigned OldSuffixSize = 2 + OldDelimiter.size();
1336+
// We create a virtual text environment which expects a null-terminated
1337+
// string, so we cannot use StringRef.
1338+
std::string RawText =
1339+
Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize);
1340+
if (NewDelimiter != OldDelimiter) {
1341+
// Don't update to the canonical delimiter 'deli' if ')deli"' occurs in the
1342+
// raw string.
1343+
std::string CanonicalDelimiterSuffix = (")" + NewDelimiter + "\"").str();
1344+
if (StringRef(RawText).contains(CanonicalDelimiterSuffix))
1345+
NewDelimiter = OldDelimiter;
1346+
}
1347+
1348+
unsigned NewPrefixSize = 3 + NewDelimiter.size();
1349+
unsigned NewSuffixSize = 2 + NewDelimiter.size();
13201350

1321-
// The first start column is the column the raw text starts.
1322-
unsigned FirstStartColumn = StartColumn + PrefixSize;
1351+
// The first start column is the column the raw text starts after formatting.
1352+
unsigned FirstStartColumn = StartColumn + NewPrefixSize;
13231353

13241354
// The next start column is the intended indentation a line break inside
13251355
// the raw string at level 0. It is determined by the following rules:
@@ -1330,7 +1360,7 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
13301360
// These rules have the advantage that the formatted content both does not
13311361
// violate the rectangle rule and visually flows within the surrounding
13321362
// source.
1333-
bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n';
1363+
bool ContentStartsOnNewline = Current.TokenText[OldPrefixSize] == '\n';
13341364
unsigned NextStartColumn = ContentStartsOnNewline
13351365
? State.Stack.back().Indent + Style.IndentWidth
13361366
: FirstStartColumn;
@@ -1344,12 +1374,9 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
13441374
// - if the raw string prefix does not start on a newline, it is the current
13451375
// indent.
13461376
unsigned LastStartColumn = Current.NewlinesBefore
1347-
? FirstStartColumn - PrefixSize
1377+
? FirstStartColumn - NewPrefixSize
13481378
: State.Stack.back().Indent;
13491379

1350-
std::string RawText =
1351-
Current.TokenText.substr(PrefixSize).drop_back(SuffixSize);
1352-
13531380
std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(
13541381
RawStringStyle, RawText, {tooling::Range(0, RawText.size())},
13551382
FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>",
@@ -1362,8 +1389,33 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
13621389
return 0;
13631390
}
13641391
if (!DryRun) {
1392+
if (NewDelimiter != OldDelimiter) {
1393+
// In 'R"delimiter(...', the delimiter starts 2 characters after the start
1394+
// of the token.
1395+
SourceLocation PrefixDelimiterStart =
1396+
Current.Tok.getLocation().getLocWithOffset(2);
1397+
auto PrefixErr = Whitespaces.addReplacement(tooling::Replacement(
1398+
SourceMgr, PrefixDelimiterStart, OldDelimiter.size(), NewDelimiter));
1399+
if (PrefixErr) {
1400+
llvm::errs()
1401+
<< "Failed to update the prefix delimiter of a raw string: "
1402+
<< llvm::toString(std::move(PrefixErr)) << "\n";
1403+
}
1404+
// In 'R"delimiter(...)delimiter"', the suffix delimiter starts at
1405+
// position length - 1 - |delimiter|.
1406+
SourceLocation SuffixDelimiterStart =
1407+
Current.Tok.getLocation().getLocWithOffset(Current.TokenText.size() -
1408+
1 - OldDelimiter.size());
1409+
auto SuffixErr = Whitespaces.addReplacement(tooling::Replacement(
1410+
SourceMgr, SuffixDelimiterStart, OldDelimiter.size(), NewDelimiter));
1411+
if (SuffixErr) {
1412+
llvm::errs()
1413+
<< "Failed to update the suffix delimiter of a raw string: "
1414+
<< llvm::toString(std::move(SuffixErr)) << "\n";
1415+
}
1416+
}
13651417
SourceLocation OriginLoc =
1366-
Current.Tok.getLocation().getLocWithOffset(PrefixSize);
1418+
Current.Tok.getLocation().getLocWithOffset(OldPrefixSize);
13671419
for (const tooling::Replacement &Fix : Fixes.first) {
13681420
auto Err = Whitespaces.addReplacement(tooling::Replacement(
13691421
SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),
@@ -1376,7 +1428,7 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
13761428
}
13771429
unsigned RawLastLineEndColumn = getLastLineEndColumn(
13781430
*NewCode, FirstStartColumn, Style.TabWidth, Encoding);
1379-
State.Column = RawLastLineEndColumn + SuffixSize;
1431+
State.Column = RawLastLineEndColumn + NewSuffixSize;
13801432
return Fixes.second;
13811433
}
13821434

‎clang/lib/Format/Format.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,7 @@ template <> struct MappingTraits<FormatStyle::RawStringFormat> {
459459
IO.mapOptional("Language", Format.Language);
460460
IO.mapOptional("Delimiters", Format.Delimiters);
461461
IO.mapOptional("EnclosingFunctions", Format.EnclosingFunctions);
462+
IO.mapOptional("CanonicalDelimiter", Format.CanonicalDelimiter);
462463
IO.mapOptional("BasedOnStyle", Format.BasedOnStyle);
463464
}
464465
};
@@ -713,6 +714,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
713714
"PARSE_TEXT_PROTO",
714715
"ParseTextProto",
715716
},
717+
/*CanonicalDelimiter=*/"",
716718
/*BasedOnStyle=*/"google",
717719
}};
718720
GoogleStyle.SpacesBeforeTrailingComments = 2;

‎clang/unittests/Format/FormatTest.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -10429,13 +10429,15 @@ TEST_F(FormatTest, ParsesConfiguration) {
1042910429
FormatStyle::LK_TextProto,
1043010430
{"pb", "proto"},
1043110431
{"PARSE_TEXT_PROTO"},
10432+
/*CanonicalDelimiter=*/"",
1043210433
"llvm",
1043310434
},
1043410435
{
1043510436
FormatStyle::LK_Cpp,
1043610437
{"cc", "cpp"},
1043710438
{"C_CODEBLOCK", "CPPEVAL"},
10438-
"",
10439+
/*CanonicalDelimiter=*/"cc",
10440+
/*BasedOnStyle=*/"",
1043910441
},
1044010442
};
1044110443

@@ -10453,7 +10455,8 @@ TEST_F(FormatTest, ParsesConfiguration) {
1045310455
" - 'cpp'\n"
1045410456
" EnclosingFunctions:\n"
1045510457
" - 'C_CODEBLOCK'\n"
10456-
" - 'CPPEVAL'\n",
10458+
" - 'CPPEVAL'\n"
10459+
" CanonicalDelimiter: 'cc'",
1045710460
RawStringFormats, ExpectedRawStringFormats);
1045810461
}
1045910462

‎clang/unittests/Format/FormatTestRawStrings.cpp

+40-11
Original file line numberDiff line numberDiff line change
@@ -66,30 +66,41 @@ class FormatTestRawStrings : public ::testing::Test {
6666
FormatStyle Style = getLLVMStyle();
6767
Style.ColumnLimit = ColumnLimit;
6868
Style.RawStringFormats = {
69-
{/*Language=*/FormatStyle::LK_TextProto,
70-
/*Delimiters=*/{"pb"},
71-
/*EnclosingFunctions=*/{},
72-
/*BasedOnStyle=*/"google"},
69+
{
70+
/*Language=*/FormatStyle::LK_TextProto,
71+
/*Delimiters=*/{"pb"},
72+
/*EnclosingFunctions=*/{},
73+
/*CanonicalDelimiter=*/"",
74+
/*BasedOnStyle=*/"google",
75+
},
7376
};
7477
return Style;
7578
}
7679

7780
FormatStyle getRawStringLLVMCppStyleBasedOn(std::string BasedOnStyle) {
7881
FormatStyle Style = getLLVMStyle();
7982
Style.RawStringFormats = {
80-
{/*Language=*/FormatStyle::LK_Cpp,
81-
/*Delimiters=*/{"cpp"},
82-
/*EnclosingFunctions=*/{}, BasedOnStyle},
83+
{
84+
/*Language=*/FormatStyle::LK_Cpp,
85+
/*Delimiters=*/{"cpp"},
86+
/*EnclosingFunctions=*/{},
87+
/*CanonicalDelimiter=*/"",
88+
BasedOnStyle,
89+
},
8390
};
8491
return Style;
8592
}
8693

8794
FormatStyle getRawStringGoogleCppStyleBasedOn(std::string BasedOnStyle) {
8895
FormatStyle Style = getGoogleStyle(FormatStyle::LK_Cpp);
8996
Style.RawStringFormats = {
90-
{/*Language=*/FormatStyle::LK_Cpp,
91-
/*Delimiters=*/{"cpp"},
92-
/*EnclosingFunctions=*/{}, BasedOnStyle},
97+
{
98+
/*Language=*/FormatStyle::LK_Cpp,
99+
/*Delimiters=*/{"cpp"},
100+
/*EnclosingFunctions=*/{},
101+
/*CanonicalDelimiter=*/"",
102+
BasedOnStyle,
103+
},
93104
};
94105
return Style;
95106
}
@@ -131,7 +142,13 @@ TEST_F(FormatTestRawStrings, UsesConfigurationOverBaseStyle) {
131142
EXPECT_EQ(0, parseConfiguration("---\n"
132143
"Language: Cpp\n"
133144
"BasedOnStyle: Google", &Style).value());
134-
Style.RawStringFormats = {{FormatStyle::LK_Cpp, {"cpp"}, {}, "llvm"}};
145+
Style.RawStringFormats = {{
146+
FormatStyle::LK_Cpp,
147+
{"cpp"},
148+
{},
149+
/*CanonicalDelimiter=*/"",
150+
/*BasedOnStyle=*/"llvm",
151+
}};
135152
expect_eq(R"test(int* i = R"cpp(int* j = 0;)cpp";)test",
136153
format(R"test(int * i = R"cpp(int * j = 0;)cpp";)test", Style));
137154
}
@@ -752,6 +769,18 @@ a = ParseTextProto<ProtoType>(R"(key:value)");)test",
752769
Style));
753770
}
754771

772+
TEST_F(FormatTestRawStrings, UpdatesToCanonicalDelimiters) {
773+
FormatStyle Style = getRawStringPbStyleWithColumns(25);
774+
Style.RawStringFormats[0].CanonicalDelimiter = "proto";
775+
expect_eq(R"test(a = R"proto(key: value)proto";)test",
776+
format(R"test(a = R"pb(key:value)pb";)test", Style));
777+
778+
// Don't update to canonical delimiter if it occurs as a raw string suffix in
779+
// the raw string content.
780+
expect_eq(R"test(a = R"pb(key: ")proto")pb";)test",
781+
format(R"test(a = R"pb(key:")proto")pb";)test", Style));
782+
}
783+
755784
} // end namespace
756785
} // end namespace format
757786
} // end namespace clang

0 commit comments

Comments
 (0)
Please sign in to comment.