diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp @@ -7,9 +7,6 @@ static_assert(sizeof(a) <= 10, ""); // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when the string literal is an empty string [modernize-unary-static-assert] // CHECK-FIXES: {{^}} static_assert(sizeof(a) <= 10 );{{$}} - static_assert(sizeof(a) <= 12, L""); - // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when - // CHECK-FIXES: {{^}} static_assert(sizeof(a) <= 12 );{{$}} FOO // CHECK-FIXES: {{^}} FOO{{$}} static_assert(sizeof(a) <= 17, MSG); diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -1804,7 +1804,7 @@ /// * An array of getByteLength() char used to store the string data. public: - enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32 }; + enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32, Unevaluated }; private: unsigned numTrailingObjects(OverloadToken) const { return 1; } @@ -1866,7 +1866,7 @@ unsigned CharByteWidth); StringRef getString() const { - assert(getCharByteWidth() == 1 && + assert((isUnevaluated() || getCharByteWidth() == 1) && "This function is used in places that assume strings use char"); return StringRef(getStrDataAsChar(), getByteLength()); } @@ -1906,6 +1906,7 @@ bool isUTF8() const { return getKind() == UTF8; } bool isUTF16() const { return getKind() == UTF16; } bool isUTF32() const { return getKind() == UTF32; } + bool isUnevaluated() const { return getKind() == Unevaluated; } bool isPascal() const { return StringLiteralBits.IsPascal; } bool containsNonAscii() const { diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1408,6 +1408,7 @@ StringArgument<"Replacement", 1>]; let MeaningfulToClassTemplateDefinition = 1; let Documentation = [DeprecatedDocs]; + let ParseArgumentsAsUnevaluated = 1; } def Destructor : InheritableAttr { @@ -3044,6 +3045,7 @@ return this->getSemanticSpelling() == CXX11_nodiscard; } }]; + let ParseArgumentsAsUnevaluated = 1; } def Weak : InheritableAttr { diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -56,7 +56,9 @@ "%select{in %1|for diagnostic message in static_assert|" "for optional message in 'availability' attribute|" "for %select{language name|source container name|USR}1 in " - "'external_source_symbol' attribute}0">; + "'external_source_symbol' attribute|" + "as argument of '%1' attribute}0">; + def err_invalid_string_udl : Error< "string literal with user-defined suffix cannot be used here">; def err_invalid_character_udl : Error< diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -276,6 +276,13 @@ "identifier">, InGroup; def err_unsupported_string_concat : Error< "unsupported non-standard concatenation of string literals">; + +def err_unevaluated_string_prefix : Error< + "an unevaluated string literal cannot have an encoding prefix">; +def err_unevaluated_string_udl : Error< + "an unevaluated string literal cannot be a user-defined literal">; +def err_unevaluated_string_invalid_escape_sequence : Error< + "invalid escape sequence '%0' in an unevaluated string literal">; def err_string_concat_mixed_suffix : Error< "differing user-defined suffixes ('%0' and '%1') in string literal " "concatenation">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -433,9 +433,6 @@ "ISO C requires a named parameter before '...'">; def err_declarator_need_ident : Error<"declarator requires an identifier">; def err_language_linkage_spec_unknown : Error<"unknown linkage language">; -def err_language_linkage_spec_not_ascii : Error< - "string literal in language linkage specifier cannot have an " - "encoding-prefix">; def ext_use_out_of_scope_declaration : ExtWarn< "use of out-of-scope declaration of %0%select{| whose type is not " "compatible with that of an implicit declaration}1">, diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h --- a/clang/include/clang/Lex/LiteralSupport.h +++ b/clang/include/clang/Lex/LiteralSupport.h @@ -212,6 +212,11 @@ } }; +enum class StringLiteralEvalMethod { + Evaluated, + Unevaluated, +}; + /// StringLiteralParser - This decodes string escape characters and performs /// wide string analysis and Translation Phase #6 (concatenation of string /// literals) (C99 5.1.1.2p1). @@ -230,20 +235,23 @@ SmallString<32> UDSuffixBuf; unsigned UDSuffixToken; unsigned UDSuffixOffset; + StringLiteralEvalMethod EvalMethod; + public: - StringLiteralParser(ArrayRef StringToks, - Preprocessor &PP); - StringLiteralParser(ArrayRef StringToks, - const SourceManager &sm, const LangOptions &features, - const TargetInfo &target, + StringLiteralParser(ArrayRef StringToks, Preprocessor &PP, + StringLiteralEvalMethod StringMethod = + StringLiteralEvalMethod::Evaluated); + StringLiteralParser(ArrayRef StringToks, const SourceManager &sm, + const LangOptions &features, const TargetInfo &target, DiagnosticsEngine *diags = nullptr) - : SM(sm), Features(features), Target(target), Diags(diags), - MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), - ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { + : SM(sm), Features(features), Target(target), Diags(diags), + MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), + ResultPtr(ResultBuf.data()), + EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false), + Pascal(false) { init(StringToks); } - bool hadError; bool Pascal; @@ -269,6 +277,9 @@ bool isUTF16() const { return Kind == tok::utf16_string_literal; } bool isUTF32() const { return Kind == tok::utf32_string_literal; } bool isPascal() const { return Pascal; } + bool isUnevaluated() const { + return EvalMethod == StringLiteralEvalMethod::Unevaluated; + } StringRef getUDSuffix() const { return UDSuffixBuf; } diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -1788,8 +1788,12 @@ bool IsUnevaluated); ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral = false); + ExprResult ParseUnevaluatedStringLiteralExpression(); private: + ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral, + bool Unevaluated); + ExprResult ParseExpressionWithLeadingAt(SourceLocation AtLoc); ExprResult ParseExpressionWithLeadingExtension(SourceLocation ExtLoc); @@ -2790,6 +2794,13 @@ /// clang accepts as an extension. void DiagnoseCXX11AttributeExtension(ParsedAttributes &Attrs); + ExprResult ParseUnevaluatedStringInAttribute(const IdentifierInfo &AttrName); + + bool + ParseAttributeArgumentList(const clang::IdentifierInfo &AttrName, + SmallVectorImpl &Exprs, + ParsedAttributeArgumentsProperties ArgsProperties); + /// Parses syntax-generic attribute arguments for attributes which are /// known to the implementation, and adds them to the given ParsedAttributes /// list with the given attribute syntax. Returns the number of arguments diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h --- a/clang/include/clang/Sema/ParsedAttr.h +++ b/clang/include/clang/Sema/ParsedAttr.h @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/VersionTuple.h" +#include #include #include #include @@ -911,6 +912,15 @@ VecTy AttrList; }; +struct ParsedAttributeArgumentsProperties { + ParsedAttributeArgumentsProperties(uint32_t StringLiteralBits) + : StringLiterals(StringLiteralBits) {} + bool isStringLiteralArg(unsigned I) const { return StringLiterals.test(I); } + +private: + std::bitset<32> StringLiterals; +}; + /// ParsedAttributes - A collection of parsed attributes. Currently /// we don't differentiate between the various attribute syntaxes, /// which is basically silly. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -5695,6 +5695,8 @@ ExprResult ActOnStringLiteral(ArrayRef StringToks, Scope *UDLScope = nullptr); + ExprResult ActOnUnevaluatedStringLiteral(ArrayRef StringToks); + /// ControllingExprOrType is either an opaque pointer coming out of a /// ParsedType or an Expr *. FIXME: it'd be better to split this interface /// into two so we don't take a void *, but that's awkward because one of diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1136,6 +1136,8 @@ case UTF32: CharByteWidth = Target.getChar32Width(); break; + case Unevaluated: + return sizeof(char); // Host; } assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple"); CharByteWidth /= 8; @@ -1149,35 +1151,45 @@ const SourceLocation *Loc, unsigned NumConcatenated) : Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary) { - assert(Ctx.getAsConstantArrayType(Ty) && - "StringLiteral must be of constant array type!"); - unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind); - unsigned ByteLength = Str.size(); - assert((ByteLength % CharByteWidth == 0) && - "The size of the data must be a multiple of CharByteWidth!"); - - // Avoid the expensive division. The compiler should be able to figure it - // out by itself. However as of clang 7, even with the appropriate - // llvm_unreachable added just here, it is not able to do so. - unsigned Length; - switch (CharByteWidth) { - case 1: - Length = ByteLength; - break; - case 2: - Length = ByteLength / 2; - break; - case 4: - Length = ByteLength / 4; - break; - default: - llvm_unreachable("Unsupported character width!"); - } + + unsigned Length = Str.size(); StringLiteralBits.Kind = Kind; - StringLiteralBits.CharByteWidth = CharByteWidth; - StringLiteralBits.IsPascal = Pascal; StringLiteralBits.NumConcatenated = NumConcatenated; + + if (Kind != StringKind::Unevaluated) { + assert(Ctx.getAsConstantArrayType(Ty) && + "StringLiteral must be of constant array type!"); + unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind); + unsigned ByteLength = Str.size(); + assert((ByteLength % CharByteWidth == 0) && + "The size of the data must be a multiple of CharByteWidth!"); + + // Avoid the expensive division. The compiler should be able to figure it + // out by itself. However as of clang 7, even with the appropriate + // llvm_unreachable added just here, it is not able to do so. + switch (CharByteWidth) { + case 1: + Length = ByteLength; + break; + case 2: + Length = ByteLength / 2; + break; + case 4: + Length = ByteLength / 4; + break; + default: + llvm_unreachable("Unsupported character width!"); + } + + StringLiteralBits.CharByteWidth = CharByteWidth; + StringLiteralBits.IsPascal = Pascal; + } else { + assert(!Pascal && "Can't make an unevaluated Pascal string"); + StringLiteralBits.CharByteWidth = 1; + StringLiteralBits.IsPascal = false; + } + *getTrailingObjects() = Length; // Initialize the trailing array of SourceLocation. @@ -1186,7 +1198,7 @@ NumConcatenated * sizeof(SourceLocation)); // Initialize the trailing array of char holding the string data. - std::memcpy(getTrailingObjects(), Str.data(), ByteLength); + std::memcpy(getTrailingObjects(), Str.data(), Str.size()); setDependence(ExprDependence::None); } @@ -1223,6 +1235,7 @@ void StringLiteral::outputString(raw_ostream &OS) const { switch (getKind()) { + case Unevaluated: case Ordinary: break; // no prefix. case Wide: OS << 'L'; break; @@ -1333,7 +1346,8 @@ const TargetInfo &Target, unsigned *StartToken, unsigned *StartTokenByteOffset) const { assert((getKind() == StringLiteral::Ordinary || - getKind() == StringLiteral::UTF8) && + getKind() == StringLiteral::UTF8 || + getKind() == StringLiteral::Unevaluated) && "Only narrow string literals are currently supported"); // Loop over all of the tokens in this string until we find the one that diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -87,6 +87,24 @@ MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd); } +static bool IsEscapeValidInUnevaluatedStringLiteral(char Escape) { + switch (Escape) { + case '\'': + case '"': + case '?': + case '\\': + case 'a': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + return true; + } + return false; +} + /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in /// either a character or a string literal. static unsigned ProcessCharEscape(const char *ThisTokBegin, @@ -94,7 +112,8 @@ const char *ThisTokEnd, bool &HadError, FullSourceLoc Loc, unsigned CharWidth, DiagnosticsEngine *Diags, - const LangOptions &Features) { + const LangOptions &Features, + StringLiteralEvalMethod EvalMethod) { const char *EscapeBegin = ThisTokBuf; bool Delimited = false; bool EndDelimiterFound = false; @@ -105,6 +124,7 @@ // We know that this character can't be off the end of the buffer, because // that would have been \", which would not have been the end of string. unsigned ResultChar = *ThisTokBuf++; + char Escape = ResultChar; switch (ResultChar) { // These map to themselves. case '\\': case '\'': case '"': case '?': break; @@ -318,6 +338,12 @@ } } + if (EvalMethod == StringLiteralEvalMethod::Unevaluated && + !IsEscapeValidInUnevaluatedStringLiteral(Escape)) { + Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, + diag::err_unevaluated_string_invalid_escape_sequence) + << StringRef(EscapeBegin, ThisTokBuf - EscapeBegin); + } return ResultChar; } @@ -1727,9 +1753,10 @@ } unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo()); uint64_t result = - ProcessCharEscape(TokBegin, begin, end, HadError, - FullSourceLoc(Loc,PP.getSourceManager()), - CharWidth, &PP.getDiagnostics(), PP.getLangOpts()); + ProcessCharEscape(TokBegin, begin, end, HadError, + FullSourceLoc(Loc, PP.getSourceManager()), CharWidth, + &PP.getDiagnostics(), PP.getLangOpts(), + StringLiteralEvalMethod::Evaluated); *buffer_begin++ = result; } @@ -1837,13 +1864,14 @@ /// hex-digit hex-digit hex-digit hex-digit /// \endverbatim /// -StringLiteralParser:: -StringLiteralParser(ArrayRef StringToks, - Preprocessor &PP) - : SM(PP.getSourceManager()), Features(PP.getLangOpts()), - Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()), - MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), - ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { +StringLiteralParser::StringLiteralParser(ArrayRef StringToks, + Preprocessor &PP, + StringLiteralEvalMethod EvalMethod) + : SM(PP.getSourceManager()), Features(PP.getLangOpts()), + Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()), + MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), + ResultPtr(ResultBuf.data()), EvalMethod(EvalMethod), hadError(false), + Pascal(false) { init(StringToks); } @@ -1860,12 +1888,12 @@ assert(!StringToks.empty() && "expected at least one token"); MaxTokenLength = StringToks[0].getLength(); assert(StringToks[0].getLength() >= 2 && "literal token is invalid!"); - SizeBound = StringToks[0].getLength()-2; // -2 for "". - Kind = StringToks[0].getKind(); - + SizeBound = StringToks[0].getLength() - 2; // -2 for "". hadError = false; - // Implement Translation Phase #6: concatenation of string literals + // Determines the kind of string from the prefix + Kind = tok::string_literal; + /// (C99 5.1.1.2p1). The common case is only one string fragment. for (const Token &Tok : StringToks) { if (Tok.getLength() < 2) @@ -1882,7 +1910,11 @@ // Remember if we see any wide or utf-8/16/32 strings. // Also check for illegal concatenations. - if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) { + if (isUnevaluated() && Tok.getKind() != tok::string_literal) { + if (Diags) + Diags->Report(Tok.getLocation(), diag::err_unevaluated_string_prefix); + hadError = true; + } else if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) { if (isOrdinary()) { Kind = Tok.getKind(); } else { @@ -1965,13 +1997,18 @@ // result of a concatenation involving at least one user-defined-string- // literal, all the participating user-defined-string-literals shall // have the same ud-suffix. - if (UDSuffixBuf != UDSuffix) { + bool UnevaluatedStringHasUDL = isUnevaluated() && !UDSuffix.empty(); + if (UDSuffixBuf != UDSuffix || UnevaluatedStringHasUDL) { if (Diags) { SourceLocation TokLoc = StringToks[i].getLocation(); - Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) - << UDSuffixBuf << UDSuffix - << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) - << SourceRange(TokLoc, TokLoc); + if (UnevaluatedStringHasUDL) { + Diags->Report(TokLoc, diag::err_unevaluated_string_udl) + << SourceRange(TokLoc, TokLoc); + } else { + Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) + << UDSuffixBuf << UDSuffix + << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc); + } } hadError = true; } @@ -2043,8 +2080,9 @@ ++ThisTokBuf; // skip " // Check if this is a pascal string - if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd && - ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') { + if (!isUnevaluated() && Features.PascalStrings && + ThisTokBuf + 1 != ThisTokEnd && ThisTokBuf[0] == '\\' && + ThisTokBuf[1] == 'p') { // If the \p sequence is found in the first token, we have a pascal string // Otherwise, if we already have a pascal string, ignore the first \p @@ -2080,9 +2118,9 @@ } // Otherwise, this is a non-UCN escape character. Process it. unsigned ResultChar = - ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError, - FullSourceLoc(StringToks[i].getLocation(), SM), - CharByteWidth*8, Diags, Features); + ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError, + FullSourceLoc(StringToks[i].getLocation(), SM), + CharByteWidth * 8, Diags, Features, EvalMethod); if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of @@ -2104,6 +2142,8 @@ } } + assert((!Pascal || !isUnevaluated()) && + "Pascal string in unevaluated context"); if (Pascal) { if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of @@ -2277,8 +2317,8 @@ ByteNo -= Len; } else { ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError, - FullSourceLoc(Tok.getLocation(), SM), - CharByteWidth*8, Diags, Features); + FullSourceLoc(Tok.getLocation(), SM), CharByteWidth * 8, + Diags, Features, StringLiteralEvalMethod::Evaluated); --ByteNo; } assert(!HadError && "This method isn't valid on erroneous strings"); diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1869,7 +1869,8 @@ if (!Tok.isAnnotation() && Tok.getIdentifierInfo()) Tok.setKind(tok::identifier); else if (Tok.is(tok::string_literal) && !Tok.hasUDSuffix()) { - StringLiteralParser Literal(Tok, *this); + StringLiteralParser Literal(Tok, *this, + StringLiteralEvalMethod::Unevaluated); if (Literal.hadError) return; diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -1088,7 +1088,8 @@ if (DiagName.is(tok::eod)) PP.getDiagnostics().dump(); else if (DiagName.is(tok::string_literal) && !DiagName.hasUDSuffix()) { - StringLiteralParser Literal(DiagName, PP); + StringLiteralParser Literal(DiagName, PP, + StringLiteralEvalMethod::Unevaluated); if (Literal.hadError) return; PP.getDiagnostics().dump(Literal.GetString()); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -288,6 +288,16 @@ #undef CLANG_ATTR_IDENTIFIER_ARG_LIST } +/// Determine whether the given attribute has an identifier argument. +static ParsedAttributeArgumentsProperties +attributeStringLiteralListArg(const IdentifierInfo &II) { +#define CLANG_ATTR_STRING_LITERAL_ARG_LIST + return llvm::StringSwitch(normalizeAttrName(II.getName())) +#include "clang/Parse/AttrParserStringSwitches.inc" + .Default(0); +#undef CLANG_ATTR_STRING_LITERAL_ARG_LIST +} + /// Determine whether the given attribute has a variadic identifier argument. static bool attributeHasVariadicIdentifierArg(const IdentifierInfo &II) { #define CLANG_ATTR_VARIADIC_IDENTIFIER_ARG_LIST @@ -371,6 +381,79 @@ ScopeName, ScopeLoc, nullptr, 0, Form); } +ExprResult +Parser::ParseUnevaluatedStringInAttribute(const IdentifierInfo &AttrName) { + if (Tok.is(tok::l_paren)) { + BalancedDelimiterTracker Paren(*this, tok::l_paren); + Paren.consumeOpen(); + ExprResult Res = ParseUnevaluatedStringInAttribute(AttrName); + Paren.consumeClose(); + return Res; + } + if (!isTokenStringLiteral()) { + Diag(Tok.getLocation(), diag::err_expected_string_literal) + << /*in attribute...*/ 4 << AttrName.getName(); + return ExprError(); + } + return ParseUnevaluatedStringLiteralExpression(); +} + +bool Parser::ParseAttributeArgumentList( + const IdentifierInfo &AttrName, SmallVectorImpl &Exprs, + ParsedAttributeArgumentsProperties ArgsProperties) { + bool SawError = false; + unsigned Arg = 0; + while (true) { + ExprResult Expr; + if (ArgsProperties.isStringLiteralArg(Arg)) { + Expr = ParseUnevaluatedStringInAttribute(AttrName); + } else if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) { + Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists); + Expr = ParseBraceInitializer(); + } else { + Expr = ParseAssignmentExpression(); + } + Expr = Actions.CorrectDelayedTyposInExpr(Expr); + + if (Tok.is(tok::ellipsis)) + Expr = Actions.ActOnPackExpansion(Expr.get(), ConsumeToken()); + else if (Tok.is(tok::code_completion)) { + // There's nothing to suggest in here as we parsed a full expression. + // Instead fail and propogate the error since caller might have something + // the suggest, e.g. signature help in function call. Note that this is + // performed before pushing the \p Expr, so that signature help can report + // current argument correctly. + SawError = true; + cutOffParsing(); + break; + } + if (Expr.isInvalid()) { + SawError = true; + break; + SkipUntil(tok::comma, tok::r_paren, StopBeforeMatch); + } else { + Exprs.push_back(Expr.get()); + } + if (Tok.isNot(tok::comma)) + break; + // Move to the next argument, remember where the comma was. + Token Comma = Tok; + ConsumeToken(); + checkPotentialAngleBracketDelimiter(Comma); + Arg++; + } + if (SawError) { + // Ensure typos get diagnosed when errors were encountered while parsing the + // expression list. + for (auto &E : Exprs) { + ExprResult Expr = Actions.CorrectDelayedTyposInExpr(E); + if (Expr.isUsable()) + E = Expr.get(); + } + } + return SawError; +} + unsigned Parser::ParseAttributeArgsCommon( IdentifierInfo *AttrName, SourceLocation AttrNameLoc, ParsedAttributes &Attrs, SourceLocation *EndLoc, IdentifierInfo *ScopeName, @@ -463,9 +546,9 @@ : Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprVector ParsedExprs; - if (ParseExpressionList(ParsedExprs, llvm::function_ref(), - /*FailImmediatelyOnInvalidExpr=*/true, - /*EarlyTypoCorrection=*/true)) { + ParsedAttributeArgumentsProperties ArgProperties = + attributeStringLiteralListArg(*AttrName); + if (ParseAttributeArgumentList(*AttrName, ParsedExprs, ArgProperties)) { SkipUntil(tok::r_paren, StopAtSemi); return 0; } diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -350,7 +350,7 @@ /// Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) { assert(isTokenStringLiteral() && "Not a string literal!"); - ExprResult Lang = ParseStringLiteralExpression(false); + ExprResult Lang = ParseUnevaluatedStringLiteralExpression(); ParseScope LinkageScope(this, Scope::DeclScope); Decl *LinkageSpec = @@ -1023,7 +1023,7 @@ return nullptr; } - AssertMessage = ParseStringLiteralExpression(); + AssertMessage = ParseUnevaluatedStringLiteralExpression(); if (AssertMessage.isInvalid()) { SkipMalformedDecl(); return nullptr; diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3256,6 +3256,17 @@ /// string-literal /// \verbatim ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) { + return ParseStringLiteralExpression(AllowUserDefinedLiteral, + /*Unevaluated=*/false); +} + +ExprResult Parser::ParseUnevaluatedStringLiteralExpression() { + return ParseStringLiteralExpression(/*AllowUserDefinedLiteral=*/false, + /*Unevaluated=*/true); +} + +ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral, + bool Unevaluated) { assert(isTokenStringLiteral() && "Not a string literal!"); // String concat. Note that keywords like __func__ and __FUNCTION__ are not @@ -3267,6 +3278,11 @@ ConsumeStringToken(); } while (isTokenStringLiteral()); + if (Unevaluated) { + assert(!AllowUserDefinedLiteral && "UDL are always evaluated"); + return Actions.ActOnUnevaluatedStringLiteral(StringToks); + } + // Pass the set of string tokens, ready for concatenation, to the actions. return Actions.ActOnStringLiteral(StringToks, AllowUserDefinedLiteral ? getCurScope() @@ -3478,9 +3494,9 @@ if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) { Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists); Expr = ParseBraceInitializer(); - } else + } else { Expr = ParseAssignmentExpression(); - + } if (EarlyTypoCorrection) Expr = Actions.CorrectDelayedTyposInExpr(Expr); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -349,7 +349,7 @@ if (ArgLocation) *ArgLocation = E->getBeginLoc(); - if (!Literal || !Literal->isOrdinary()) { + if (!Literal || (!Literal->isUnevaluated() && !Literal->isOrdinary())) { Diag(E->getBeginLoc(), diag::err_attribute_argument_type) << CI << AANT_ArgumentString; return false; @@ -381,6 +381,16 @@ // Now check for an actual string literal. Expr *ArgExpr = AL.getArgAsExpr(ArgNum); + const auto *Literal = dyn_cast(ArgExpr->IgnoreParenCasts()); + if (ArgLocation) + *ArgLocation = ArgExpr->getBeginLoc(); + + if (!Literal || (!Literal->isUnevaluated() && !Literal->isOrdinary())) { + Diag(ArgExpr->getBeginLoc(), diag::err_attribute_argument_type) + << AL << AANT_ArgumentString; + return false; + } + Str = Literal->getString(); return checkStringLiteralArgumentAttr(AL, ArgExpr, Str, ArgLocation); } @@ -624,7 +634,7 @@ if (const auto *StrLit = dyn_cast(ArgExp)) { if (StrLit->getLength() == 0 || - (StrLit->isOrdinary() && StrLit->getString() == StringRef("*"))) { + (StrLit->isUnevaluated() && StrLit->getString() == StringRef("*"))) { // Pass empty strings to the analyzer without warnings. // Treat "*" as the universal lock. Args.push_back(ArgExp); @@ -865,7 +875,8 @@ if (!AL.checkAtLeastNumArgs(S, 1)) return false; - if (!isIntOrBool(AL.getArgAsExpr(0))) { + Expr *First = AL.getArgAsExpr(0); + if (!isIntOrBool(First)) { S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type) << AL << 1 << AANT_ArgumentIntOrBool; return false; diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -16467,11 +16467,7 @@ Expr *LangStr, SourceLocation LBraceLoc) { StringLiteral *Lit = cast(LangStr); - if (!Lit->isOrdinary()) { - Diag(LangStr->getExprLoc(), diag::err_language_linkage_spec_not_ascii) - << LangStr->getSourceRange(); - return nullptr; - } + assert(Lit->isUnevaluated() && "Unexpected string literal kind"); StringRef Lang = Lit->getString(); LinkageSpecDecl::LanguageIDs Language; @@ -16936,10 +16932,7 @@ llvm::raw_svector_ostream Msg(MsgBuffer); if (AssertMessage) { const auto *MsgStr = cast(AssertMessage); - if (MsgStr->isOrdinary()) - Msg << MsgStr->getString(); - else - MsgStr->printPretty(Msg, nullptr, getPrintingPolicy()); + Msg << MsgStr->getString(); } Expr *InnerCond = nullptr; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1911,6 +1911,30 @@ return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc); } +ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef StringToks) { + StringLiteralParser Literal(StringToks, PP, + StringLiteralEvalMethod::Unevaluated); + if (Literal.hadError) + return ExprError(); + + SmallVector StringTokLocs; + for (const Token &Tok : StringToks) + StringTokLocs.push_back(Tok.getLocation()); + + StringLiteral *Lit = StringLiteral::Create( + Context, Literal.GetString(), StringLiteral::Unevaluated, false, {}, + &StringTokLocs[0], StringTokLocs.size()); + + if (!Literal.getUDSuffix().empty()) { + SourceLocation UDSuffixLoc = + getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()], + Literal.getUDSuffixOffset()); + return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl)); + } + + return Lit; +} + /// ActOnStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -4086,6 +4086,9 @@ case StringLiteral::Wide: return Context.typesAreCompatible(Context.getWideCharType(), QualType(ToPointeeType, 0)); + case StringLiteral::Unevaluated: + assert(false && "Unevaluated string literal in expression"); + break; } } } diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -140,6 +140,9 @@ if (IsWideCharCompatible(ElemTy, Context)) return SIF_IncompatWideStringIntoWideChar; return SIF_Other; + case StringLiteral::Unevaluated: + assert(false && "Unevaluated string literal in initialization"); + break; } llvm_unreachable("missed a StringLiteral kind?"); diff --git a/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp b/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp --- a/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp @@ -8,7 +8,7 @@ extern "C" plusplus { } -extern u8"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern L"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern u"C++" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern U"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} +extern u8"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern L"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern u"C++" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern U"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} diff --git a/clang/test/CXX/dcl.dcl/p4-0x.cpp b/clang/test/CXX/dcl.dcl/p4-0x.cpp --- a/clang/test/CXX/dcl.dcl/p4-0x.cpp +++ b/clang/test/CXX/dcl.dcl/p4-0x.cpp @@ -18,4 +18,7 @@ static_assert(T(), ""); static_assert(U(), ""); // expected-error {{ambiguous}} -static_assert(false, L"\x14hi" "!" R"x(")x"); // expected-error {{static assertion failed: L"\024hi!\""}} +static_assert(false, L"\x14hi" // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \ + // expected-error {{invalid escape sequence '\x14' in an unevaluated string literal}} + "!" + R"x(")x"); diff --git a/clang/test/FixIt/fixit-static-assert.cpp b/clang/test/FixIt/fixit-static-assert.cpp --- a/clang/test/FixIt/fixit-static-assert.cpp +++ b/clang/test/FixIt/fixit-static-assert.cpp @@ -11,8 +11,6 @@ // String literal prefixes are good. static_assert(true && R"(RawString)"); // CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:"," -static_assert(true && L"RawString"); -// CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:"," static_assert(true); // CHECK-DAG: {[[@LINE-1]]:19-[[@LINE-1]]:19}:", \"\"" diff --git a/clang/test/Parser/cxx-attributes.cpp b/clang/test/Parser/cxx-attributes.cpp --- a/clang/test/Parser/cxx-attributes.cpp +++ b/clang/test/Parser/cxx-attributes.cpp @@ -41,7 +41,7 @@ pi = &i[0]; } -[[deprecated([""])]] int WrongArgs; // expected-error {{expected variable name or 'this' in lambda capture list}} +[[deprecated([""])]] int WrongArgs; // expected-error {{expected string literal as argument of 'deprecated' attribute}} [[,,,,,]] int Commas1; // ok [[,, maybe_unused]] int Commas2; // ok [[maybe_unused,,,]] int Commas3; // ok diff --git a/clang/test/Parser/cxx0x-attributes.cpp b/clang/test/Parser/cxx0x-attributes.cpp --- a/clang/test/Parser/cxx0x-attributes.cpp +++ b/clang/test/Parser/cxx0x-attributes.cpp @@ -445,3 +445,8 @@ ) { } }; + +namespace P2361 { +[[deprecated(L"abc")]] void a(); // expected-error{{an unevaluated string literal cannot have an encoding prefix}} +[[nodiscard("\123")]] int b(); // expected-error{{invalid escape sequence '\123' in an unevaluated string literal}} +} diff --git a/clang/test/Sema/MicrosoftExtensions.c b/clang/test/Sema/MicrosoftExtensions.c --- a/clang/test/Sema/MicrosoftExtensions.c +++ b/clang/test/Sema/MicrosoftExtensions.c @@ -123,7 +123,7 @@ #define MY_TEXT "This is also deprecated" __declspec(deprecated(MY_TEXT)) void Dfunc1( void ) {} // expected-note {{'Dfunc1' has been explicitly marked deprecated here}} -struct __declspec(deprecated(123)) DS2 {}; // expected-error {{'deprecated' attribute requires a string}} +struct __declspec(deprecated(123)) DS2 {}; // expected-error {{expected string literal as argument of 'deprecated' attribute}} void test( void ) { e1 = one; // expected-warning {{'e1' is deprecated: This is deprecated}} diff --git a/clang/test/Sema/attr-btf_type_tag.c b/clang/test/Sema/attr-btf_type_tag.c --- a/clang/test/Sema/attr-btf_type_tag.c +++ b/clang/test/Sema/attr-btf_type_tag.c @@ -8,7 +8,7 @@ #define __tag6 __attribute__((btf_type_tag("tag6"))) int __attribute__((btf_type_tag("tag1", "tag2"))) *invalid1; // expected-error {{'btf_type_tag' attribute takes one argument}} -int __attribute__((btf_type_tag(2))) *invalid2; // expected-error {{'btf_type_tag' attribute requires a string}} +int __attribute__((btf_type_tag(2))) *invalid2; // expected-error {{expected string literal as argument of 'btf_type_tag' attribute}} int * __tag1 __tag2 * __tag3 __tag4 * __tag5 __tag6 *g; diff --git a/clang/test/Sema/attr-capabilities.c b/clang/test/Sema/attr-capabilities.c --- a/clang/test/Sema/attr-capabilities.c +++ b/clang/test/Sema/attr-capabilities.c @@ -17,8 +17,8 @@ int Test4 __attribute__((try_acquire_capability("test4"))); // expected-error {{'try_acquire_capability' attribute only applies to functions}} int Test5 __attribute__((release_capability("test5"))); // expected-warning {{'release_capability' attribute only applies to functions}} -struct __attribute__((capability(12))) Test3 {}; // expected-error {{'capability' attribute requires a string}} -struct __attribute__((shared_capability(Test2))) Test4 {}; // expected-error {{'shared_capability' attribute requires a string}} +struct __attribute__((capability(12))) Test3 {}; // expected-error {{expected string literal as argument of 'capability' attribute}} +struct __attribute__((shared_capability(Test2))) Test4 {}; // expected-error {{expected string literal as argument of 'shared_capability' attribute}} struct __attribute__((capability)) Test5 {}; // expected-error {{'capability' attribute takes one argument}} struct __attribute__((shared_capability("test1", 12))) Test6 {}; // expected-error {{'shared_capability' attribute takes one argument}} diff --git a/clang/test/SemaCXX/static-assert.cpp b/clang/test/SemaCXX/static-assert.cpp --- a/clang/test/SemaCXX/static-assert.cpp +++ b/clang/test/SemaCXX/static-assert.cpp @@ -29,13 +29,23 @@ S s1; // expected-note {{in instantiation of template class 'S' requested here}} S s2; -static_assert(false, L"\xFFFFFFFF"); // expected-error {{static assertion failed: L"\xFFFFFFFF"}} -static_assert(false, u"\U000317FF"); // expected-error {{static assertion failed: u"\U000317FF"}} - -static_assert(false, u8"Ω"); // expected-error {{static assertion failed: u8"\316\251"}} -static_assert(false, L"\u1234"); // expected-error {{static assertion failed: L"\x1234"}} -static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{static assertion failed: L"\x1FF""0\x123""fx\xFFFFFgoop"}} - +static_assert(false, L"\xFFFFFFFF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \ + // expected-error {{invalid escape sequence '\xFFFFFFFF' in an unevaluated string literal}} +static_assert(false, u"\U000317FF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +// FIXME: render this as u8"\u03A9" +static_assert(false, u8"Ω"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +static_assert(false, L"\u1234"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +static_assert(false, L"\x1ff" // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \ + // expected-error {{invalid escape sequence '\x1ff' in an unevaluated string literal}} + "0\x123" // expected-error {{invalid escape sequence '\x123' in an unevaluated string literal}} + "fx\xfffff" // expected-error {{invalid escape sequence '\xfffff' in an unevaluated string literal}} + "goop"); + +static_assert(false, "\'\"\?\\\a\b\f\n\r\t\v"); // expected-error {{'"?\}} +static_assert(true, "\xFF"); // expected-error {{invalid escape sequence '\xFF' in an unevaluated string literal}} +static_assert(true, "\123"); // expected-error {{invalid escape sequence '\123' in an unevaluated string literal}} +static_assert(true, "\pOh no, a Pascal string!"); // expected-warning {{unknown escape sequence '\p'}} \ + // expected-error {{invalid escape sequence '\p' in an unevaluated string literal}} static_assert(false, R"(a \tb c diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -2280,6 +2280,14 @@ .Default(false); } +static bool isStringLiteralArgument(const Record *Arg) { + return !Arg->getSuperClasses().empty() && + llvm::StringSwitch( + Arg->getSuperClasses().back().first->getName()) + .Case("StringArgument", true) + .Default(false); +} + static bool isVariadicIdentifierArgument(const Record *Arg) { return !Arg->getSuperClasses().empty() && llvm::StringSwitch( @@ -2317,6 +2325,27 @@ OS << "#endif // CLANG_ATTR_VARIADIC_IDENTIFIER_ARG_LIST\n\n"; } +// Emits the list of arguments that shoulkd be parsed as unevaluated string +// literals for each attributes +static void emitClangAttrUnevaluatedStringLiteralList(RecordKeeper &Records, + raw_ostream &OS) { + OS << "#if defined(CLANG_ATTR_STRING_LITERAL_ARG_LIST)\n"; + std::vector Attrs = Records.getAllDerivedDefinitions("Attr"); + for (const auto *Attr : Attrs) { + std::vector Args = Attr->getValueAsListOfDefs("Args"); + uint32_t Bits = 0; + for (uint32_t N = 0; N < Args.size(); N++) + Bits |= (isStringLiteralArgument(Args[N]) << N); + if (!Bits) + continue; + // All these spellings have at least one string literal has argument. + forEachUniqueSpelling(*Attr, [&](const FlattenedSpelling &S) { + OS << ".Case(\"" << S.name() << "\", " << Bits << ")\n"; + }); + } + OS << "#endif // CLANG_ATTR_STRING_LITERAL_ARG_LIST\n\n"; +} + // Emits the first-argument-is-identifier property for attributes. static void emitClangAttrIdentifierArgList(RecordKeeper &Records, raw_ostream &OS) { OS << "#if defined(CLANG_ATTR_IDENTIFIER_ARG_LIST)\n"; @@ -4615,6 +4644,7 @@ emitSourceFileHeader("Parser-related llvm::StringSwitch cases", OS); emitClangAttrArgContextList(Records, OS); emitClangAttrIdentifierArgList(Records, OS); + emitClangAttrUnevaluatedStringLiteralList(Records, OS); emitClangAttrVariadicIdentifierArgList(Records, OS); emitClangAttrThisIsaIdentifierArgList(Records, OS); emitClangAttrAcceptsExprPack(Records, OS); diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -605,7 +605,7 @@ # crash if LLVM is built with GCC and LTO enabled (#57740). Until # these bugs are fixed, we need to disable dead store eliminations # based on object lifetime. - add_flag_if_supported("-fno-lifetime-dse" CMAKE_CXX_FLAGS) + # add_flag_if_supported("-fno-lifetime-dse" CMAKE_CXX_FLAGS) endif ( LLVM_COMPILER_IS_GCC_COMPATIBLE ) # Modules enablement for GCC-compatible compilers: