diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -3313,10 +3313,12 @@ /// Print this object as an equivalent expression. void printAsExpr(llvm::raw_ostream &OS) const; + void printAsExpr(llvm::raw_ostream &OS, const PrintingPolicy &Policy) const; /// Print this object as an initializer suitable for a variable of the /// object's type. void printAsInit(llvm::raw_ostream &OS) const; + void printAsInit(llvm::raw_ostream &OS, const PrintingPolicy &Policy) const; const APValue &getValue() const { return Value; } diff --git a/clang/include/clang/AST/PrettyPrinter.h b/clang/include/clang/AST/PrettyPrinter.h --- a/clang/include/clang/AST/PrettyPrinter.h +++ b/clang/include/clang/AST/PrettyPrinter.h @@ -73,7 +73,8 @@ MSVCFormatting(false), ConstantsAsWritten(false), SuppressImplicitBase(false), FullyQualifiedName(false), PrintCanonicalTypes(false), PrintInjectedClassNameWithArguments(true), - UsePreferredNames(true), AlwaysIncludeTypeForTemplateArgument(false) {} + UsePreferredNames(true), AlwaysIncludeTypeForTemplateArgument(false), + EntireContentsOfLargeArray(false) {} /// Adjust this printing policy for cases where it's known that we're /// printing C++ code (for instance, if AST dumping reaches a C++-only @@ -280,6 +281,10 @@ /// parameters. unsigned AlwaysIncludeTypeForTemplateArgument : 1; + /// Whether to print the entire array initializers, especially on non-type + /// template parameters, no matter how many elements there are. + unsigned EntireContentsOfLargeArray : 1; + /// Callbacks to use to allow the behavior of printing to be customized. const PrintingCallbacks *Callbacks = nullptr; }; diff --git a/clang/include/clang/Basic/CharInfo.h b/clang/include/clang/Basic/CharInfo.h --- a/clang/include/clang/Basic/CharInfo.h +++ b/clang/include/clang/Basic/CharInfo.h @@ -38,15 +38,16 @@ }; } // end namespace charinfo -/// Returns true if this is an ASCII character. +/// Returns true if a byte is an ASCII character. LLVM_READNONE inline bool isASCII(char c) { return static_cast(c) <= 127; } LLVM_READNONE inline bool isASCII(unsigned char c) { return c <= 127; } -/// Returns true if this is an ASCII character. +/// Returns true if a codepoint is an ASCII character. LLVM_READNONE inline bool isASCII(uint32_t c) { return c <= 127; } +LLVM_READNONE inline bool isASCII(int64_t c) { return 0 <= c && c <= 127; } /// Returns true if this is a valid first character of a C identifier, /// which is [a-zA-Z_]. @@ -162,6 +163,44 @@ CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0; } +enum class EscapeChar { + Single = 1, + Double = 2, + SingleAndDouble = static_cast(Single) | static_cast(Double), +}; + +/// Return C-style escaped string for special characters, or an empty string if +/// there is no such mapping. +template +LLVM_READONLY inline auto escapeCStyle(CharT Ch) -> StringRef { + switch (Ch) { + case '\\': + return "\\\\"; + case '\'': + if ((static_cast(Opt) & static_cast(EscapeChar::Single)) == 0) + break; + return "\\'"; + case '"': + if ((static_cast(Opt) & static_cast(EscapeChar::Double)) == 0) + break; + return "\\\""; + case '\a': + return "\\a"; + case '\b': + return "\\b"; + case '\f': + return "\\f"; + case '\n': + return "\\n"; + case '\r': + return "\\r"; + case '\t': + return "\\t"; + case '\v': + return "\\v"; + } + return {}; +} /// Converts the given ASCII character to its lowercase equivalent. /// diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -625,6 +625,67 @@ return V.convertToDouble(); } +static bool TryPrintAsStringLiteral(raw_ostream &Out, + const PrintingPolicy &Policy, + const ArrayType *ATy, + ArrayRef Inits) { + if (Inits.empty()) + return false; + + QualType Ty = ATy->getElementType(); + if (!Ty->isAnyCharacterType()) + return false; + + // Nothing we can do about a sequence that is not null-terminated + if (!Inits.back().getInt().isZero()) + return false; + else + Inits = Inits.drop_back(); + + llvm::SmallString<40> Buf; + Buf.push_back('"'); + + // Better than printing a two-digit sequence of 10 integers. + constexpr size_t MaxN = 36; + StringRef Ellipsis; + if (Inits.size() > MaxN && !Policy.EntireContentsOfLargeArray) { + Ellipsis = "[...]"; + Inits = + Inits.take_front(std::min(MaxN - Ellipsis.size() / 2, Inits.size())); + } + + for (auto &Val : Inits) { + auto Char64 = Val.getInt().getExtValue(); + if (!isASCII(Char64)) + return false; // Bye bye, see you in integers. + auto Ch = static_cast(Char64); + // The diagnostic message is 'quoted' + auto Escaped = escapeCStyle(Ch); + if (Escaped.empty()) { + if (!isPrintable(Ch)) + return false; + Buf.emplace_back(Ch); + } else { + Buf.append(Escaped); + } + } + + Buf.append(Ellipsis); + Buf.push_back('"'); + + if (Ty->isWideCharType()) + Out << 'L'; + else if (Ty->isChar8Type()) + Out << "u8"; + else if (Ty->isChar16Type()) + Out << 'u'; + else if (Ty->isChar32Type()) + Out << 'U'; + + Out << Buf; + return true; +} + void APValue::printPretty(raw_ostream &Out, const ASTContext &Ctx, QualType Ty) const { printPretty(Out, Ctx.getPrintingPolicy(), Ty, &Ctx); @@ -795,17 +856,23 @@ } case APValue::Array: { const ArrayType *AT = Ty->castAsArrayTypeUnsafe(); + unsigned N = getArrayInitializedElts(); + if (N != 0 && TryPrintAsStringLiteral(Out, Policy, AT, + {&getArrayInitializedElt(0), N})) + return; QualType ElemTy = AT->getElementType(); Out << '{'; - if (unsigned N = getArrayInitializedElts()) { - getArrayInitializedElt(0).printPretty(Out, Policy, ElemTy, Ctx); - for (unsigned I = 1; I != N; ++I) { + unsigned I = 0; + switch (N) { + case 0: + for (; I != N; ++I) { Out << ", "; - if (I == 10) { - // Avoid printing out the entire contents of large arrays. - Out << "..."; - break; + if (I == 10 && !Policy.EntireContentsOfLargeArray) { + Out << "...}"; + return; } + LLVM_FALLTHROUGH; + default: getArrayInitializedElt(I).printPretty(Out, Policy, ElemTy, Ctx); } } diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -1507,12 +1507,20 @@ } void TemplateParamObjectDecl::printAsExpr(llvm::raw_ostream &OS) const { - const ASTContext &Ctx = getASTContext(); - getType().getUnqualifiedType().print(OS, Ctx.getPrintingPolicy()); - printAsInit(OS); + printAsExpr(OS, getASTContext().getPrintingPolicy()); +} + +void TemplateParamObjectDecl::printAsExpr(llvm::raw_ostream &OS, + const PrintingPolicy &Policy) const { + getType().getUnqualifiedType().print(OS, Policy); + printAsInit(OS, Policy); } void TemplateParamObjectDecl::printAsInit(llvm::raw_ostream &OS) const { - const ASTContext &Ctx = getASTContext(); - getValue().printPretty(OS, Ctx, getType()); + printAsInit(OS, getASTContext().getPrintingPolicy()); +} + +void TemplateParamObjectDecl::printAsInit(llvm::raw_ostream &OS, + const PrintingPolicy &Policy) const { + getValue().printPretty(OS, Policy, getType(), &getASTContext()); } diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -960,40 +960,10 @@ break; } - switch (Val) { - case '\\': - OS << "'\\\\'"; - break; - case '\'': - OS << "'\\''"; - break; - case '\a': - // TODO: K&R: the meaning of '\\a' is different in traditional C - OS << "'\\a'"; - break; - case '\b': - OS << "'\\b'"; - break; - // Nonstandard escape sequence. - /*case '\e': - OS << "'\\e'"; - break;*/ - case '\f': - OS << "'\\f'"; - break; - case '\n': - OS << "'\\n'"; - break; - case '\r': - OS << "'\\r'"; - break; - case '\t': - OS << "'\\t'"; - break; - case '\v': - OS << "'\\v'"; - break; - default: + auto Escaped = escapeCStyle(Val); + if (!Escaped.empty()) { + OS << "'" << Escaped << "'"; + } else { // A character literal might be sign-extended, which // would result in an invalid \U escape sequence. // FIXME: multicharacter literals such as '\xFF\xFF\xFF\xFF' @@ -1163,8 +1133,9 @@ unsigned LastSlashX = getLength(); for (unsigned I = 0, N = getLength(); I != N; ++I) { - switch (uint32_t Char = getCodeUnit(I)) { - default: + uint32_t Char = getCodeUnit(I); + auto Escaped = escapeCStyle(Char); + if (Escaped.empty()) { // FIXME: Convert UTF-8 back to codepoints before rendering. // Convert UTF-16 surrogate pairs back to codepoints before rendering. @@ -1192,7 +1163,7 @@ for (/**/; Shift >= 0; Shift -= 4) OS << Hex[(Char >> Shift) & 15]; LastSlashX = I; - break; + continue; } if (Char > 0xffff) @@ -1205,7 +1176,7 @@ << Hex[(Char >> 8) & 15] << Hex[(Char >> 4) & 15] << Hex[(Char >> 0) & 15]; - break; + continue; } // If we used \x... for the previous character, and this character is a @@ -1230,17 +1201,9 @@ << (char)('0' + ((Char >> 6) & 7)) << (char)('0' + ((Char >> 3) & 7)) << (char)('0' + ((Char >> 0) & 7)); - break; - // Handle some common non-printable cases to make dumps prettier. - case '\\': OS << "\\\\"; break; - case '"': OS << "\\\""; break; - case '\a': OS << "\\a"; break; - case '\b': OS << "\\b"; break; - case '\f': OS << "\\f"; break; - case '\n': OS << "\\n"; break; - case '\r': OS << "\\r"; break; - case '\t': OS << "\\t"; break; - case '\v': OS << "\\v"; break; + } else { + // Handle some common non-printable cases to make dumps prettier. + OS << Escaped; } } OS << '"'; diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp --- a/clang/lib/AST/TemplateBase.cpp +++ b/clang/lib/AST/TemplateBase.cpp @@ -434,7 +434,7 @@ NamedDecl *ND = getAsDecl(); if (getParamTypeForDecl()->isRecordType()) { if (auto *TPO = dyn_cast(ND)) { - TPO->printAsInit(Out); + TPO->printAsInit(Out, Policy); break; } } diff --git a/clang/test/SemaTemplate/temp_arg_string_printing.cpp b/clang/test/SemaTemplate/temp_arg_string_printing.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaTemplate/temp_arg_string_printing.cpp @@ -0,0 +1,141 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -ast-print %s | FileCheck %s + +using size_t = __SIZE_TYPE__; +static_assert(__has_builtin(__make_integer_seq)); + +template class idx_seq {}; +template using make_idx_seq = __make_integer_seq; + +template +struct Str { + constexpr Str(CharT const (&s)[N]) : Str(s, make_idx_seq()) {} + CharT value[N]; + +private: + template + constexpr Str(CharT const (&s)[N], idx_seq) : value{s[I]...} {} +}; + +template class ASCII {}; + +void not_string() { + // CHECK{LITERAL}: ASCII<{{9, -1, 42}}> + new ASCII<(int[]){9, -1, 42}>; + // CHECK{LITERAL}: ASCII<{{3.140000e+00, 0.000000e+00, 4.200000e+01}}> + new ASCII<(double[]){3.14, 0., 42.}>; +} + +void narrow() { + // CHECK{LITERAL}: ASCII<{""}> + new ASCII<"">; + // CHECK{LITERAL}: ASCII<{"the quick brown fox jumps"}> + new ASCII<"the quick brown fox jumps">; + // CHECK{LITERAL}: ASCII<{"OVER THE LAZY DOG 0123456789"}> + new ASCII<"OVER THE LAZY DOG 0123456789">; + // CHECK{LITERAL}: ASCII<{"\\`~!@#$%^&*()_+-={}[]|\'\";:,.<>?/"}> + new ASCII?/)">; + // CHECK{LITERAL}: ASCII<{{101, 115, 99, 97, 112, 101, 0, 0}}> + new ASCII<"escape\0">; + // CHECK{LITERAL}: ASCII<{"escape\r\n"}> + new ASCII<"escape\r\n">; + // CHECK{LITERAL}: ASCII<{"escape\\\t\f\v"}> + new ASCII<"escape\\\t\f\v">; + // CHECK{LITERAL}: ASCII<{"escape\a\bc"}> + new ASCII<"escape\a\b\c">; + // CHECK{LITERAL}: ASCII<{{110, 111, 116, 17, 0}}> + new ASCII<"not\x11">; + // CHECK{LITERAL}: ASCII<{{18, 20, 127, 16, 1, 32, 97, 98, 99, 0}}> + new ASCII<"\x12\x14\x7f\x10\x01 abc">; + // CHECK{LITERAL}: ASCII<{{18, 20, 127, 16, 1, 32, 97, 98, 99, 100, ...}}> + new ASCII<"\x12\x14\x7f\x10\x01 abcd">; + // CHECK{LITERAL}: ASCII<{"print more characters as string"}> + new ASCII<"print more characters as string">; + // CHECK{LITERAL}: ASCII<{"print even more characters as string"}> + new ASCII<"print even more characters as string">; + // CHECK{LITERAL}: ASCII<{"print many characters no more than[...]"}> + new ASCII<"print many characters no more than a limit">; + // CHECK{LITERAL}: ASCII<{"\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r"}> + new ASCII<"\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r">; + // CHECK{LITERAL}: ASCII<{"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n[...]"}> + new ASCII<"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n">; +} + +void wide() { + // CHECK{LITERAL}: ASCII<{L""}> + new ASCII; + // CHECK{LITERAL}: ASCII<{L"the quick brown fox jumps"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{L"OVER THE LAZY DOG 0123456789"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{L"\\`~!@#$%^&*()_+-={}[]|\'\";:,.<>?/"}> + new ASCII?/)">; + // CHECK{LITERAL}: ASCII<{{101, 115, 99, 97, 112, 101, 0, 0}}> + new ASCII; + // CHECK{LITERAL}: ASCII<{L"escape\r\n"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{L"escape\\\t\f\v"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{L"escape\a\bc"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{{110, 111, 116, 17, 0}}> + new ASCII; + // CHECK{LITERAL}: ASCII<{{18, 20, 255, 22909, 136, 32, 97, 98, 99, 0}}> + new ASCII; + // CHECK{LITERAL}: ASCII<{{18, 20, 255, 22909, 136, 32, 97, 98, 99, 100, ...}}> + new ASCII; + // CHECK{LITERAL}: ASCII<{L"print more characters as string"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{L"print even more characters as string"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{L"print many characters no more than[...]"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{L"\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{L"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n[...]"}> + new ASCII; +} + +void utf8() { + // CHECK{LITERAL}: ASCII<{u8""}> + new ASCII; + // CHECK{LITERAL}: ASCII<{u8"\\`~!@#$%^&*()_+-={}[]|\'\";:,.<>?/"}> + new ASCII?/)">; + // CHECK{LITERAL}: ASCII<{{101, 115, 99, 97, 112, 101, 0, 0}}> + new ASCII; + // CHECK{LITERAL}: ASCII<{u8"escape\r\n"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{{229, 165, 189, 239, 191, 189, 0}}> + new ASCII; + // CHECK{LITERAL}: ASCII<{u8"print many characters no more than[...]"}> + new ASCII; +} + +void utf16() { + // CHECK{LITERAL}: ASCII<{u""}> + new ASCII; + // CHECK{LITERAL}: ASCII<{u"\\`~!@#$%^&*()_+-={}[]|\'\";:,.<>?/"}> + new ASCII?/)">; + // CHECK{LITERAL}: ASCII<{{101, 115, 99, 97, 112, 101, 0, 0}}> + new ASCII; + // CHECK{LITERAL}: ASCII<{u"escape\r\n"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{{22909, 65533, 0}}> + new ASCII; + // CHECK{LITERAL}: ASCII<{u"print many characters no more than[...]"}> + new ASCII; +} + +void utf32() { + // CHECK{LITERAL}: ASCII<{U""}> + new ASCII; + // CHECK{LITERAL}: ASCII<{U"\\`~!@#$%^&*()_+-={}[]|\'\";:,.<>?/"}> + new ASCII?/)">; + // CHECK{LITERAL}: ASCII<{{101, 115, 99, 97, 112, 101, 0, 0}}> + new ASCII; + // CHECK{LITERAL}: ASCII<{U"escape\r\n"}> + new ASCII; + // CHECK{LITERAL}: ASCII<{{22909, 131358, 0}}> + new ASCII; + // CHECK{LITERAL}: ASCII<{U"print many characters no more than[...]"}> + new ASCII; +} diff --git a/clang/unittests/AST/TypePrinterTest.cpp b/clang/unittests/AST/TypePrinterTest.cpp --- a/clang/unittests/AST/TypePrinterTest.cpp +++ b/clang/unittests/AST/TypePrinterTest.cpp @@ -62,4 +62,35 @@ ASSERT_TRUE(PrintedTypeMatches( Code, {}, Matcher, "const N::Type &", [](PrintingPolicy &Policy) { Policy.FullyQualifiedName = true; })); -} \ No newline at end of file +} + +TEST(TypePrinter, TemplateIdWithNTTP) { + constexpr char Code[] = R"cpp( + template + struct Str { + constexpr Str(char const (&s)[N]) { __builtin_memcpy(value, s, N); } + char value[N]; + }; + template class ASCII {}; + + ASCII<"this nontype template argument is too long to print"> x; + )cpp"; + auto Matcher = classTemplateSpecializationDecl( + hasName("ASCII"), has(cxxConstructorDecl( + isMoveConstructor(), + has(parmVarDecl(hasType(qualType().bind("id"))))))); + + ASSERT_TRUE(PrintedTypeMatches( + Code, {"-std=c++20"}, Matcher, + R"(ASCII<{"this nontype template argument is [...]"}> &&)", + [](PrintingPolicy &Policy) { + Policy.EntireContentsOfLargeArray = false; + })); + + ASSERT_TRUE(PrintedTypeMatches( + Code, {"-std=c++20"}, Matcher, + R"(ASCII<{"this nontype template argument is too long to print"}> &&)", + [](PrintingPolicy &Policy) { + Policy.EntireContentsOfLargeArray = true; + })); +}