diff --git a/llvm/include/llvm/Support/FormatVariadic.h b/llvm/include/llvm/Support/FormatVariadic.h --- a/llvm/include/llvm/Support/FormatVariadic.h +++ b/llvm/include/llvm/Support/FormatVariadic.h @@ -43,6 +43,7 @@ namespace llvm { enum class ReplacementType { Empty, Format, Literal }; +enum class FormatEscapeMode { Legacy, Strict }; struct ReplacementItem { ReplacementItem() = default; @@ -66,23 +67,25 @@ protected: StringRef Fmt; ArrayRef Adapters; + FormatEscapeMode EscapeMode; static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where, size_t &Align, char &Pad); static std::pair - splitLiteralAndReplacement(StringRef Fmt); + splitLiteralAndReplacement(StringRef Fmt, FormatEscapeMode EscapeMode); formatv_object_base(StringRef Fmt, - ArrayRef Adapters) - : Fmt(Fmt), Adapters(Adapters) {} + ArrayRef Adapters, + FormatEscapeMode EscapeMode = FormatEscapeMode::Legacy) + : Fmt(Fmt), Adapters(Adapters), EscapeMode(EscapeMode) {} formatv_object_base(formatv_object_base const &rhs) = delete; formatv_object_base(formatv_object_base &&rhs) = default; public: void format(raw_ostream &S) const { - for (auto &R : parseFormatString(Fmt)) { + for (auto &R : parseFormatString(Fmt, EscapeMode)) { if (R.Type == ReplacementType::Empty) continue; if (R.Type == ReplacementType::Literal) { @@ -100,7 +103,9 @@ Align.format(S, R.Options); } } - static SmallVector parseFormatString(StringRef Fmt); + static SmallVector + parseFormatString(StringRef Fmt, + FormatEscapeMode EscapeMode = FormatEscapeMode::Legacy); static Optional parseReplacementItem(StringRef Spec); @@ -148,8 +153,9 @@ }; public: - formatv_object(StringRef Fmt, Tuple &&Params) - : formatv_object_base(Fmt, ParameterPointers), + formatv_object(StringRef Fmt, Tuple &&Params, + FormatEscapeMode EscapeMode = FormatEscapeMode::Legacy) + : formatv_object_base(Fmt, ParameterPointers, EscapeMode), Parameters(std::move(Params)) { ParameterPointers = apply_tuple(create_adapters(), Parameters); } @@ -246,14 +252,36 @@ // assertion. Otherwise, it will try to do something reasonable, but in general // the details of what that is are undefined. // +namespace detail { + template -inline auto formatv(const char *Fmt, Ts &&... Vals) -> formatv_object(Vals))...))> { +inline auto formatv_impl(const char *Fmt, FormatEscapeMode EscapeMode, + Ts &&... Vals) + -> formatv_object(Vals))...))> { using ParamTuple = decltype( std::make_tuple(detail::build_format_adapter(std::forward(Vals))...)); return formatv_object( Fmt, - std::make_tuple(detail::build_format_adapter(std::forward(Vals))...)); + std::make_tuple(detail::build_format_adapter(std::forward(Vals))...), + EscapeMode); +} + +} // end namespace detail + +template +inline auto formatv(const char *Fmt, Ts &&... Vals) -> formatv_object(Vals))...))> { + return detail::formatv_impl(Fmt, FormatEscapeMode::Legacy, + std::forward(Vals)...); +} + +template +inline auto formatv_strict(const char *Fmt, Ts &&... Vals) + -> formatv_object(Vals))...))> { + return detail::formatv_impl(Fmt, FormatEscapeMode::Strict, + std::forward(Vals)...); } } // end namespace llvm diff --git a/llvm/lib/Support/FormatVariadic.cpp b/llvm/lib/Support/FormatVariadic.cpp --- a/llvm/lib/Support/FormatVariadic.cpp +++ b/llvm/lib/Support/FormatVariadic.cpp @@ -89,25 +89,56 @@ return ReplacementItem{Spec, Index, Align, Where, Pad, Options}; } +/// Parse the prefix of the string containing the first (open or close) brace +/// including any escaped braces following it. +static Optional> +parseBracePrefix(StringRef Fmt, std::size_t Pos, char Brace) { + // If the brace is not at the start, then everything up until the brace is a + // literal. + if (Pos != 0) + return std::make_pair(ReplacementItem{Fmt.substr(0, Pos)}, Fmt.substr(Pos)); + + // If there is one or more brace, then some of them may be escaped. Treat + // these as replacements. + StringRef Braces = Fmt.take_while([=](char C) { return C == Brace; }); + if (Braces.size() > 1) { + size_t NumEscapedBraces = Braces.size() / 2; + StringRef Middle = Fmt.substr(Pos, NumEscapedBraces); + StringRef Right = Fmt.drop_front(Pos + NumEscapedBraces * 2); + return std::make_pair(ReplacementItem{Middle}, Right); + } + + // Found a single brace. Let the caller handle this case. + return llvm::None; +} + std::pair -formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) { +formatv_object_base::splitLiteralAndReplacement(StringRef Fmt, + FormatEscapeMode EscapeMode) { std::size_t From = 0; while (From < Fmt.size() && From != StringRef::npos) { std::size_t BO = Fmt.find_first_of('{', From); - // Everything up until the first brace is a literal. - if (BO != 0) - return std::make_pair(ReplacementItem{Fmt.substr(0, BO)}, Fmt.substr(BO)); - - StringRef Braces = - Fmt.drop_front(BO).take_while([](char C) { return C == '{'; }); - // If there is more than one brace, then some of them are escaped. Treat - // these as replacements. - if (Braces.size() > 1) { - size_t NumEscapedBraces = Braces.size() / 2; - StringRef Middle = Fmt.substr(BO, NumEscapedBraces); - StringRef Right = Fmt.drop_front(BO + NumEscapedBraces * 2); - return std::make_pair(ReplacementItem{Middle}, Right); + + if (EscapeMode == FormatEscapeMode::Strict) { + std::size_t BC = Fmt.find_first_of('}', From); + // Found a closing brace before an opening brace. + if (BC < BO) { + auto replacement = parseBracePrefix(Fmt, BC, '}'); + if (replacement) + return replacement.getValue(); + // Found a closing brace without a matching opening brace before it. We + // treat the rest of the string as a literal replacement, but we assert + // to indicate that this is undefined and that we consider it an error. + assert(false && "Unterminated brace sequence. Escape with }} for a " + "literal brace."); + return std::make_pair(ReplacementItem{Fmt}, StringRef()); + } } + + auto replacement = parseBracePrefix(Fmt, BO, '{'); + if (replacement) + return replacement.getValue(); + // An unterminated open brace is undefined. We treat the rest of the string // as a literal replacement, but we assert to indicate that this is // undefined and that we consider it an error. @@ -142,11 +173,12 @@ } SmallVector -formatv_object_base::parseFormatString(StringRef Fmt) { +formatv_object_base::parseFormatString(StringRef Fmt, + FormatEscapeMode EscapeMode) { SmallVector Replacements; ReplacementItem I; while (!Fmt.empty()) { - std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt); + std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt, EscapeMode); if (I.Type != ReplacementType::Empty) Replacements.push_back(I); } diff --git a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp --- a/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp +++ b/llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp @@ -54,10 +54,10 @@ const char *ModuleString = "define i32 @g(i32 %x) !prof !21 {{\n" " ret i32 0\n" - "}\n" + "}}\n" "define i32 @h(i32 %x) !prof !22 {{\n" " ret i32 0\n" - "}\n" + "}}\n" "define i32 @f(i32 %x) !prof !20 {{\n" "bb0:\n" " %y1 = icmp eq i32 %x, 0 \n" @@ -71,7 +71,7 @@ "bb3:\n" " %y2 = phi i32 [0, %bb1], [1, %bb2] \n" " ret i32 %y2\n" - "}\n" + "}}\n" "define i32 @l(i32 %x) {{\n" "bb0:\n" " %y1 = icmp eq i32 %x, 0 \n" @@ -85,39 +85,41 @@ "bb3:\n" " %y2 = phi i32 [0, %bb1], [1, %bb2] \n" " ret i32 %y2\n" - "}\n" - "!20 = !{{!\"function_entry_count\", i64 400}\n" - "!21 = !{{!\"function_entry_count\", i64 1}\n" - "!22 = !{{!\"function_entry_count\", i64 100}\n" - "!23 = !{{!\"branch_weights\", i32 64, i32 4}\n" + "}}\n" + "!20 = !{{!\"function_entry_count\", i64 400}}\n" + "!21 = !{{!\"function_entry_count\", i64 1}}\n" + "!22 = !{{!\"function_entry_count\", i64 100}}\n" + "!23 = !{{!\"branch_weights\", i32 64, i32 4}}\n" "{0}"; const char *SummaryString = - "!llvm.module.flags = !{{!1}\n" - "!1 = !{{i32 1, !\"ProfileSummary\", !2}\n" - "!2 = !{{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}\n" - "!3 = !{{!\"ProfileFormat\", !\"{0}\"}\n" - "!4 = !{{!\"TotalCount\", i64 10000}\n" - "!5 = !{{!\"MaxCount\", i64 10}\n" - "!6 = !{{!\"MaxInternalCount\", i64 1}\n" - "!7 = !{{!\"MaxFunctionCount\", i64 1000}\n" - "!8 = !{{!\"NumCounts\", i64 {1}}\n" - "!9 = !{{!\"NumFunctions\", i64 3}\n" - "!10 = !{{!\"IsPartialProfile\", i64 {2}}\n" - "!11 = !{{!\"PartialProfileRatio\", double {3}}\n" - "!12 = !{{!\"DetailedSummary\", !13}\n" - "!13 = !{{!14, !15, !16}\n" - "!14 = !{{i32 10000, i64 1000, i32 1}\n" - "!15 = !{{i32 990000, i64 300, i32 {4}}\n" - "!16 = !{{i32 999999, i64 5, i32 {5}}\n"; + "!llvm.module.flags = !{{!1}}\n" + "!1 = !{{i32 1, !\"ProfileSummary\", !2}}\n" + "!2 = !{{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}}\n" + "!3 = !{{!\"ProfileFormat\", !\"{0}\"}}\n" + "!4 = !{{!\"TotalCount\", i64 10000}}\n" + "!5 = !{{!\"MaxCount\", i64 10}}\n" + "!6 = !{{!\"MaxInternalCount\", i64 1}}\n" + "!7 = !{{!\"MaxFunctionCount\", i64 1000}}\n" + "!8 = !{{!\"NumCounts\", i64 {1}}}\n" + "!9 = !{{!\"NumFunctions\", i64 3}}\n" + "!10 = !{{!\"IsPartialProfile\", i64 {2}}}\n" + "!11 = !{{!\"PartialProfileRatio\", double {3}}}\n" + "!12 = !{{!\"DetailedSummary\", !13}}\n" + "!13 = !{{!14, !15, !16}}\n" + "!14 = !{{i32 10000, i64 1000, i32 1}}\n" + "!15 = !{{i32 990000, i64 300, i32 {4}}}\n" + "!16 = !{{i32 999999, i64 5, i32 {5}}}\n"; SMDiagnostic Err; if (ProfKind) { auto Summary = - formatv(SummaryString, ProfKind, NumCounts, IsPartialProfile, - PartialProfileRatio, HotNumCounts, ColdNumCounts) + formatv_strict(SummaryString, ProfKind, NumCounts, IsPartialProfile, + PartialProfileRatio, HotNumCounts, ColdNumCounts) .str(); - return parseAssemblyString(formatv(ModuleString, Summary).str(), Err, C); + return parseAssemblyString(formatv_strict(ModuleString, Summary).str(), + Err, C); } else - return parseAssemblyString(formatv(ModuleString, "").str(), Err, C); + return parseAssemblyString(formatv_strict(ModuleString, "").str(), Err, + C); } }; diff --git a/llvm/unittests/Support/FormatVariadicTest.cpp b/llvm/unittests/Support/FormatVariadicTest.cpp --- a/llvm/unittests/Support/FormatVariadicTest.cpp +++ b/llvm/unittests/Support/FormatVariadicTest.cpp @@ -60,6 +60,20 @@ ASSERT_EQ(1u, Replacements.size()); EXPECT_EQ("{{{", Replacements[0].Spec); EXPECT_EQ(ReplacementType::Literal, Replacements[0].Type); + + // }} should be replaced with } + Replacements = + formatv_object_base::parseFormatString("}}", FormatEscapeMode::Strict); + ASSERT_EQ(1u, Replacements.size()); + EXPECT_EQ("}", Replacements[0].Spec); + EXPECT_EQ(ReplacementType::Literal, Replacements[0].Type); + + // An even number N of braces should be replaced with N/2 braces. + Replacements = formatv_object_base::parseFormatString( + "}}}}}}", FormatEscapeMode::Strict); + ASSERT_EQ(1u, Replacements.size()); + EXPECT_EQ("}}}", Replacements[0].Spec); + EXPECT_EQ(ReplacementType::Literal, Replacements[0].Type); } TEST(FormatVariadicTest, ValidReplacementSequence) { @@ -504,8 +518,8 @@ .08215f, (void *)nullptr, 0, 6.62E-34, -908234908423, 908234908422234, std::numeric_limits::infinity(), 0x0)}; // Test long string formatting with many edge cases combined. - const char *Intro = - "There are {{{0}} items in the tuple, and {{{1}} tuple(s) in the array."; + const char *Intro = "There are {{{0}}} items in the tuple, and {{{1}}} " + "tuple(s) in the array."; const char *Header = "{0,6}|{1,8}|{2,=10}|{3,=10}|{4,=13}|{5,7}|{6,7}|{7,10}|{8," "-7}|{9,10}|{10,16}|{11,17}|{12,6}|{13,4}"; @@ -516,8 +530,8 @@ std::string S; llvm::raw_string_ostream Stream(S); - Stream << formatv(Intro, std::tuple_size::value, - llvm::array_lengthof(Ts)) + Stream << formatv_strict(Intro, std::tuple_size::value, + llvm::array_lengthof(Ts)) << "\n"; Stream << formatv(Header, "Char", "HexInt", "Str", "Ref", "std::str", "double", "float", "pointer", "comma", "exp", "bigint",