diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp @@ -7,9 +7,6 @@ static_assert(sizeof(a) <= 10, ""); // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when the string literal is an empty string [modernize-unary-static-assert] // CHECK-FIXES: {{^}} static_assert(sizeof(a) <= 10 );{{$}} - static_assert(sizeof(a) <= 12, L""); - // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when - // CHECK-FIXES: {{^}} static_assert(sizeof(a) <= 12 );{{$}} FOO // CHECK-FIXES: {{^}} FOO{{$}} static_assert(sizeof(a) <= 17, MSG); diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -1804,7 +1804,7 @@ /// * An array of getByteLength() char used to store the string data. public: - enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32 }; + enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32, Unevaluated }; private: unsigned numTrailingObjects(OverloadToken) const { return 1; } @@ -1866,7 +1866,7 @@ unsigned CharByteWidth); StringRef getString() const { - assert(getCharByteWidth() == 1 && + assert((isUnevaluated() || getCharByteWidth() == 1) && "This function is used in places that assume strings use char"); return StringRef(getStrDataAsChar(), getByteLength()); } @@ -1906,6 +1906,7 @@ bool isUTF8() const { return getKind() == UTF8; } bool isUTF16() const { return getKind() == UTF16; } bool isUTF32() const { return getKind() == UTF32; } + bool isUnevaluated() const { return getKind() == Unevaluated; } bool isPascal() const { return StringLiteralBits.IsPascal; } bool containsNonAscii() const { diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1408,6 +1408,7 @@ StringArgument<"Replacement", 1>]; let MeaningfulToClassTemplateDefinition = 1; let Documentation = [DeprecatedDocs]; + let ParseArgumentsAsUnevaluated = 1; } def Destructor : InheritableAttr { @@ -3044,6 +3045,7 @@ return this->getSemanticSpelling() == CXX11_nodiscard; } }]; + let ParseArgumentsAsUnevaluated = 1; } def Weak : InheritableAttr { diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -276,6 +276,13 @@ "identifier">, InGroup; def err_unsupported_string_concat : Error< "unsupported non-standard concatenation of string literals">; + +def err_unevaluated_string_prefix : Error< + "an unevaluated string literal cannot have an encoding prefix">; +def err_unevaluated_string_udl : Error< + "an unevaluated string literal cannot be a user-defined literal">; +def err_unevaluated_string_invalid_escape_sequence : Error< + "invalid escape sequence '%0' in an unevaluated string literal">; def err_string_concat_mixed_suffix : Error< "differing user-defined suffixes ('%0' and '%1') in string literal " "concatenation">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -433,9 +433,6 @@ "ISO C requires a named parameter before '...'">; def err_declarator_need_ident : Error<"declarator requires an identifier">; def err_language_linkage_spec_unknown : Error<"unknown linkage language">; -def err_language_linkage_spec_not_ascii : Error< - "string literal in language linkage specifier cannot have an " - "encoding-prefix">; def ext_use_out_of_scope_declaration : ExtWarn< "use of out-of-scope declaration of %0%select{| whose type is not " "compatible with that of an implicit declaration}1">, diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h --- a/clang/include/clang/Lex/LiteralSupport.h +++ b/clang/include/clang/Lex/LiteralSupport.h @@ -212,6 +212,11 @@ } }; +enum class StringLiteralEvalMethod { + Evaluated, + Unevaluated, +}; + /// StringLiteralParser - This decodes string escape characters and performs /// wide string analysis and Translation Phase #6 (concatenation of string /// literals) (C99 5.1.1.2p1). @@ -230,20 +235,23 @@ SmallString<32> UDSuffixBuf; unsigned UDSuffixToken; unsigned UDSuffixOffset; + StringLiteralEvalMethod EvalMethod; + public: - StringLiteralParser(ArrayRef StringToks, - Preprocessor &PP); - StringLiteralParser(ArrayRef StringToks, - const SourceManager &sm, const LangOptions &features, - const TargetInfo &target, + StringLiteralParser(ArrayRef StringToks, Preprocessor &PP, + StringLiteralEvalMethod StringMethod = + StringLiteralEvalMethod::Evaluated); + StringLiteralParser(ArrayRef StringToks, const SourceManager &sm, + const LangOptions &features, const TargetInfo &target, DiagnosticsEngine *diags = nullptr) - : SM(sm), Features(features), Target(target), Diags(diags), - MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), - ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { + : SM(sm), Features(features), Target(target), Diags(diags), + MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), + ResultPtr(ResultBuf.data()), + EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false), + Pascal(false) { init(StringToks); } - bool hadError; bool Pascal; @@ -269,6 +277,9 @@ bool isUTF16() const { return Kind == tok::utf16_string_literal; } bool isUTF32() const { return Kind == tok::utf32_string_literal; } bool isPascal() const { return Pascal; } + bool isUnevaluated() const { + return EvalMethod == StringLiteralEvalMethod::Unevaluated; + } StringRef getUDSuffix() const { return UDSuffixBuf; } diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -1788,8 +1788,12 @@ bool IsUnevaluated); ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral = false); + ExprResult ParseUnevaluatedStringLiteralExpression(); private: + ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral, + bool Unevaluated); + ExprResult ParseExpressionWithLeadingAt(SourceLocation AtLoc); ExprResult ParseExpressionWithLeadingExtension(SourceLocation ExtLoc); @@ -1851,7 +1855,8 @@ llvm::function_ref ExpressionStarts = llvm::function_ref(), bool FailImmediatelyOnInvalidExpr = false, - bool EarlyTypoCorrection = false); + bool EarlyTypoCorrection = false, + bool AllowEvaluatedString = true); /// ParseSimpleExpressionList - A simple comma-separated list of expressions, /// used for misc language extensions. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -5695,6 +5695,8 @@ ExprResult ActOnStringLiteral(ArrayRef StringToks, Scope *UDLScope = nullptr); + ExprResult ActOnUnevaluatedStringLiteral(ArrayRef StringToks); + /// ControllingExprOrType is either an opaque pointer coming out of a /// ParsedType or an Expr *. FIXME: it'd be better to split this interface /// into two so we don't take a void *, but that's awkward because one of diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1136,6 +1136,8 @@ case UTF32: CharByteWidth = Target.getChar32Width(); break; + case Unevaluated: + return sizeof(char); // Host; } assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple"); CharByteWidth /= 8; @@ -1149,35 +1151,45 @@ const SourceLocation *Loc, unsigned NumConcatenated) : Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary) { - assert(Ctx.getAsConstantArrayType(Ty) && - "StringLiteral must be of constant array type!"); - unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind); - unsigned ByteLength = Str.size(); - assert((ByteLength % CharByteWidth == 0) && - "The size of the data must be a multiple of CharByteWidth!"); - - // Avoid the expensive division. The compiler should be able to figure it - // out by itself. However as of clang 7, even with the appropriate - // llvm_unreachable added just here, it is not able to do so. - unsigned Length; - switch (CharByteWidth) { - case 1: - Length = ByteLength; - break; - case 2: - Length = ByteLength / 2; - break; - case 4: - Length = ByteLength / 4; - break; - default: - llvm_unreachable("Unsupported character width!"); - } + + unsigned Length = Str.size(); StringLiteralBits.Kind = Kind; - StringLiteralBits.CharByteWidth = CharByteWidth; - StringLiteralBits.IsPascal = Pascal; StringLiteralBits.NumConcatenated = NumConcatenated; + + if (Kind != StringKind::Unevaluated) { + assert(Ctx.getAsConstantArrayType(Ty) && + "StringLiteral must be of constant array type!"); + unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind); + unsigned ByteLength = Str.size(); + assert((ByteLength % CharByteWidth == 0) && + "The size of the data must be a multiple of CharByteWidth!"); + + // Avoid the expensive division. The compiler should be able to figure it + // out by itself. However as of clang 7, even with the appropriate + // llvm_unreachable added just here, it is not able to do so. + switch (CharByteWidth) { + case 1: + Length = ByteLength; + break; + case 2: + Length = ByteLength / 2; + break; + case 4: + Length = ByteLength / 4; + break; + default: + llvm_unreachable("Unsupported character width!"); + } + + StringLiteralBits.CharByteWidth = CharByteWidth; + StringLiteralBits.IsPascal = Pascal; + } else { + assert(!Pascal && "Can't make an unevaluated Pascal string"); + StringLiteralBits.CharByteWidth = 1; + StringLiteralBits.IsPascal = false; + } + *getTrailingObjects() = Length; // Initialize the trailing array of SourceLocation. @@ -1186,7 +1198,7 @@ NumConcatenated * sizeof(SourceLocation)); // Initialize the trailing array of char holding the string data. - std::memcpy(getTrailingObjects(), Str.data(), ByteLength); + std::memcpy(getTrailingObjects(), Str.data(), Str.size()); setDependence(ExprDependence::None); } @@ -1223,6 +1235,7 @@ void StringLiteral::outputString(raw_ostream &OS) const { switch (getKind()) { + case Unevaluated: case Ordinary: break; // no prefix. case Wide: OS << 'L'; break; @@ -1333,7 +1346,8 @@ const TargetInfo &Target, unsigned *StartToken, unsigned *StartTokenByteOffset) const { assert((getKind() == StringLiteral::Ordinary || - getKind() == StringLiteral::UTF8) && + getKind() == StringLiteral::UTF8 || + getKind() == StringLiteral::Unevaluated) && "Only narrow string literals are currently supported"); // Loop over all of the tokens in this string until we find the one that diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -87,6 +87,24 @@ MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd); } +static bool IsEscapeValidInUnevaluatedStringLiteral(char Escape) { + switch (Escape) { + case '\'': + case '"': + case '?': + case '\\': + case 'a': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + return true; + } + return false; +} + /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in /// either a character or a string literal. static unsigned ProcessCharEscape(const char *ThisTokBegin, @@ -94,7 +112,8 @@ const char *ThisTokEnd, bool &HadError, FullSourceLoc Loc, unsigned CharWidth, DiagnosticsEngine *Diags, - const LangOptions &Features) { + const LangOptions &Features, + StringLiteralEvalMethod EvalMethod) { const char *EscapeBegin = ThisTokBuf; bool Delimited = false; bool EndDelimiterFound = false; @@ -105,6 +124,7 @@ // We know that this character can't be off the end of the buffer, because // that would have been \", which would not have been the end of string. unsigned ResultChar = *ThisTokBuf++; + char Escape = ResultChar; switch (ResultChar) { // These map to themselves. case '\\': case '\'': case '"': case '?': break; @@ -318,6 +338,12 @@ } } + if (EvalMethod == StringLiteralEvalMethod::Unevaluated && + !IsEscapeValidInUnevaluatedStringLiteral(Escape)) { + Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, + diag::err_unevaluated_string_invalid_escape_sequence) + << StringRef(EscapeBegin, ThisTokBuf - EscapeBegin); + } return ResultChar; } @@ -1727,9 +1753,10 @@ } unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo()); uint64_t result = - ProcessCharEscape(TokBegin, begin, end, HadError, - FullSourceLoc(Loc,PP.getSourceManager()), - CharWidth, &PP.getDiagnostics(), PP.getLangOpts()); + ProcessCharEscape(TokBegin, begin, end, HadError, + FullSourceLoc(Loc, PP.getSourceManager()), CharWidth, + &PP.getDiagnostics(), PP.getLangOpts(), + StringLiteralEvalMethod::Evaluated); *buffer_begin++ = result; } @@ -1837,13 +1864,14 @@ /// hex-digit hex-digit hex-digit hex-digit /// \endverbatim /// -StringLiteralParser:: -StringLiteralParser(ArrayRef StringToks, - Preprocessor &PP) - : SM(PP.getSourceManager()), Features(PP.getLangOpts()), - Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()), - MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), - ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { +StringLiteralParser::StringLiteralParser(ArrayRef StringToks, + Preprocessor &PP, + StringLiteralEvalMethod EvalMethod) + : SM(PP.getSourceManager()), Features(PP.getLangOpts()), + Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()), + MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), + ResultPtr(ResultBuf.data()), EvalMethod(EvalMethod), hadError(false), + Pascal(false) { init(StringToks); } @@ -1860,12 +1888,12 @@ assert(!StringToks.empty() && "expected at least one token"); MaxTokenLength = StringToks[0].getLength(); assert(StringToks[0].getLength() >= 2 && "literal token is invalid!"); - SizeBound = StringToks[0].getLength()-2; // -2 for "". - Kind = StringToks[0].getKind(); - + SizeBound = StringToks[0].getLength() - 2; // -2 for "". hadError = false; - // Implement Translation Phase #6: concatenation of string literals + // Determines the kind of string from the prefix + Kind = tok::string_literal; + /// (C99 5.1.1.2p1). The common case is only one string fragment. for (const Token &Tok : StringToks) { if (Tok.getLength() < 2) @@ -1882,7 +1910,12 @@ // Remember if we see any wide or utf-8/16/32 strings. // Also check for illegal concatenations. - if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) { + if (isUnevaluated() && Tok.getKind() != tok::string_literal) { + if (Diags) + Diags->Report(Tok.getLocation(), diag::err_unevaluated_string_prefix); + hadError = true; + } + else if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) { if (isOrdinary()) { Kind = Tok.getKind(); } else { @@ -1965,13 +1998,18 @@ // result of a concatenation involving at least one user-defined-string- // literal, all the participating user-defined-string-literals shall // have the same ud-suffix. - if (UDSuffixBuf != UDSuffix) { + bool UnevaluatedStringHasUDL = isUnevaluated() && !UDSuffix.empty(); + if (UDSuffixBuf != UDSuffix || UnevaluatedStringHasUDL) { if (Diags) { SourceLocation TokLoc = StringToks[i].getLocation(); - Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) - << UDSuffixBuf << UDSuffix - << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) - << SourceRange(TokLoc, TokLoc); + if (UnevaluatedStringHasUDL) { + Diags->Report(TokLoc, diag::err_unevaluated_string_udl) + << SourceRange(TokLoc, TokLoc); + } else { + Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) + << UDSuffixBuf << UDSuffix + << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc); + } } hadError = true; } @@ -2043,8 +2081,9 @@ ++ThisTokBuf; // skip " // Check if this is a pascal string - if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd && - ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') { + if (!isUnevaluated() && Features.PascalStrings && + ThisTokBuf + 1 != ThisTokEnd && ThisTokBuf[0] == '\\' && + ThisTokBuf[1] == 'p') { // If the \p sequence is found in the first token, we have a pascal string // Otherwise, if we already have a pascal string, ignore the first \p @@ -2080,9 +2119,9 @@ } // Otherwise, this is a non-UCN escape character. Process it. unsigned ResultChar = - ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError, - FullSourceLoc(StringToks[i].getLocation(), SM), - CharByteWidth*8, Diags, Features); + ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError, + FullSourceLoc(StringToks[i].getLocation(), SM), + CharByteWidth * 8, Diags, Features, EvalMethod); if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of @@ -2104,6 +2143,8 @@ } } + assert((!Pascal || !isUnevaluated()) && + "Pascal string in unevaluated context"); if (Pascal) { if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of @@ -2277,8 +2318,8 @@ ByteNo -= Len; } else { ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError, - FullSourceLoc(Tok.getLocation(), SM), - CharByteWidth*8, Diags, Features); + FullSourceLoc(Tok.getLocation(), SM), CharByteWidth * 8, + Diags, Features, StringLiteralEvalMethod::Evaluated); --ByteNo; } assert(!HadError && "This method isn't valid on erroneous strings"); diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1869,7 +1869,8 @@ if (!Tok.isAnnotation() && Tok.getIdentifierInfo()) Tok.setKind(tok::identifier); else if (Tok.is(tok::string_literal) && !Tok.hasUDSuffix()) { - StringLiteralParser Literal(Tok, *this); + StringLiteralParser Literal(Tok, *this, + StringLiteralEvalMethod::Unevaluated); if (Literal.hadError) return; diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -1088,7 +1088,8 @@ if (DiagName.is(tok::eod)) PP.getDiagnostics().dump(); else if (DiagName.is(tok::string_literal) && !DiagName.hasUDSuffix()) { - StringLiteralParser Literal(DiagName, PP); + StringLiteralParser Literal(DiagName, PP, + StringLiteralEvalMethod::Unevaluated); if (Literal.hadError) return; PP.getDiagnostics().dump(Literal.GetString()); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -463,9 +463,15 @@ : Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprVector ParsedExprs; + // If the arguments of the attributes are not evaluated, + // StringLiteral arguments are treated as unevaluated strings as it would + // not make sense to handle numerical escape sequences within them + // (non-ordinary string literals are rejected even when the arguments + // are evaluated). if (ParseExpressionList(ParsedExprs, llvm::function_ref(), /*FailImmediatelyOnInvalidExpr=*/true, - /*EarlyTypoCorrection=*/true)) { + /*EarlyTypoCorrection=*/true, + /*AllowEvaluatedString=*/!Uneval)) { SkipUntil(tok::r_paren, StopAtSemi); return 0; } diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -350,7 +350,7 @@ /// Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) { assert(isTokenStringLiteral() && "Not a string literal!"); - ExprResult Lang = ParseStringLiteralExpression(false); + ExprResult Lang = ParseUnevaluatedStringLiteralExpression(); ParseScope LinkageScope(this, Scope::DeclScope); Decl *LinkageSpec = @@ -1023,7 +1023,7 @@ return nullptr; } - AssertMessage = ParseStringLiteralExpression(); + AssertMessage = ParseUnevaluatedStringLiteralExpression(); if (AssertMessage.isInvalid()) { SkipMalformedDecl(); return nullptr; diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3256,6 +3256,20 @@ /// string-literal /// \verbatim ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) { + return ParseStringLiteralExpression(AllowUserDefinedLiteral, /*Unevaluated=*/false); +} + +ExprResult Parser::ParseUnevaluatedStringLiteralExpression() { + if (!isTokenStringLiteral()) { + Diag(Tok.getLocation(), diag::err_expected_string_literal); + return ExprError(); + } + + return ParseStringLiteralExpression(/*AllowUserDefinedLiteral=*/false, /*Unevaluated=*/true); +} + +ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral, + bool Unevaluated) { assert(isTokenStringLiteral() && "Not a string literal!"); // String concat. Note that keywords like __func__ and __FUNCTION__ are not @@ -3267,6 +3281,11 @@ ConsumeStringToken(); } while (isTokenStringLiteral()); + if (Unevaluated) { + assert(!AllowUserDefinedLiteral && "UDL are always evaluated"); + return Actions.ActOnUnevaluatedStringLiteral(StringToks); + } + // Pass the set of string tokens, ready for concatenation, to the actions. return Actions.ActOnStringLiteral(StringToks, AllowUserDefinedLiteral ? getCurScope() @@ -3468,7 +3487,8 @@ bool Parser::ParseExpressionList(SmallVectorImpl &Exprs, llvm::function_ref ExpressionStarts, bool FailImmediatelyOnInvalidExpr, - bool EarlyTypoCorrection) { + bool EarlyTypoCorrection, + bool AllowEvaluatedString) { bool SawError = false; while (true) { if (ExpressionStarts) @@ -3478,9 +3498,11 @@ if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) { Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists); Expr = ParseBraceInitializer(); - } else + } else if (!AllowEvaluatedString && tok::isStringLiteral(Tok.getKind())) { + Expr = ParseUnevaluatedStringLiteralExpression(); + } else { Expr = ParseAssignmentExpression(); - + } if (EarlyTypoCorrection) Expr = Actions.CorrectDelayedTyposInExpr(Expr); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -349,7 +349,7 @@ if (ArgLocation) *ArgLocation = E->getBeginLoc(); - if (!Literal || !Literal->isOrdinary()) { + if (!Literal || (!Literal->isUnevaluated() && !Literal->isOrdinary())) { Diag(E->getBeginLoc(), diag::err_attribute_argument_type) << CI << AANT_ArgumentString; return false; @@ -381,6 +381,16 @@ // Now check for an actual string literal. Expr *ArgExpr = AL.getArgAsExpr(ArgNum); + const auto *Literal = dyn_cast(ArgExpr->IgnoreParenCasts()); + if (ArgLocation) + *ArgLocation = ArgExpr->getBeginLoc(); + + if (!Literal || (!Literal->isUnevaluated() && !Literal->isOrdinary())) { + Diag(ArgExpr->getBeginLoc(), diag::err_attribute_argument_type) + << AL << AANT_ArgumentString; + return false; + } + Str = Literal->getString(); return checkStringLiteralArgumentAttr(AL, ArgExpr, Str, ArgLocation); } @@ -624,7 +634,7 @@ if (const auto *StrLit = dyn_cast(ArgExp)) { if (StrLit->getLength() == 0 || - (StrLit->isOrdinary() && StrLit->getString() == StringRef("*"))) { + (StrLit->isUnevaluated() && StrLit->getString() == StringRef("*"))) { // Pass empty strings to the analyzer without warnings. // Treat "*" as the universal lock. Args.push_back(ArgExp); @@ -865,7 +875,8 @@ if (!AL.checkAtLeastNumArgs(S, 1)) return false; - if (!isIntOrBool(AL.getArgAsExpr(0))) { + Expr *First = AL.getArgAsExpr(0); + if (isa(First) || !isIntOrBool(First)) { S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type) << AL << 1 << AANT_ArgumentIntOrBool; return false; diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -16467,11 +16467,7 @@ Expr *LangStr, SourceLocation LBraceLoc) { StringLiteral *Lit = cast(LangStr); - if (!Lit->isOrdinary()) { - Diag(LangStr->getExprLoc(), diag::err_language_linkage_spec_not_ascii) - << LangStr->getSourceRange(); - return nullptr; - } + assert(Lit->isUnevaluated() && "Unexpected string literal kind"); StringRef Lang = Lit->getString(); LinkageSpecDecl::LanguageIDs Language; @@ -16936,10 +16932,7 @@ llvm::raw_svector_ostream Msg(MsgBuffer); if (AssertMessage) { const auto *MsgStr = cast(AssertMessage); - if (MsgStr->isOrdinary()) - Msg << MsgStr->getString(); - else - MsgStr->printPretty(Msg, nullptr, getPrintingPolicy()); + Msg << MsgStr->getString(); } Expr *InnerCond = nullptr; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1911,6 +1911,30 @@ return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc); } +ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef StringToks) { + StringLiteralParser Literal(StringToks, PP, + StringLiteralEvalMethod::Unevaluated); + if (Literal.hadError) + return ExprError(); + + SmallVector StringTokLocs; + for (const Token &Tok : StringToks) + StringTokLocs.push_back(Tok.getLocation()); + + StringLiteral *Lit = StringLiteral::Create( + Context, Literal.GetString(), StringLiteral::Unevaluated, false, {}, + &StringTokLocs[0], StringTokLocs.size()); + + if (!Literal.getUDSuffix().empty()) { + SourceLocation UDSuffixLoc = + getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()], + Literal.getUDSuffixOffset()); + return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl)); + } + + return Lit; +} + /// ActOnStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -4086,6 +4086,9 @@ case StringLiteral::Wide: return Context.typesAreCompatible(Context.getWideCharType(), QualType(ToPointeeType, 0)); + case StringLiteral::Unevaluated: + assert(false && "Unevaluated string literal in expression"); + break; } } } diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -140,6 +140,9 @@ if (IsWideCharCompatible(ElemTy, Context)) return SIF_IncompatWideStringIntoWideChar; return SIF_Other; + case StringLiteral::Unevaluated: + assert(false && "Unevaluated string literal in initialization"); + break; } llvm_unreachable("missed a StringLiteral kind?"); diff --git a/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp b/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp --- a/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp @@ -8,7 +8,7 @@ extern "C" plusplus { } -extern u8"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern L"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern u"C++" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern U"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} +extern u8"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern L"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern u"C++" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern U"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} diff --git a/clang/test/CXX/dcl.dcl/p4-0x.cpp b/clang/test/CXX/dcl.dcl/p4-0x.cpp --- a/clang/test/CXX/dcl.dcl/p4-0x.cpp +++ b/clang/test/CXX/dcl.dcl/p4-0x.cpp @@ -18,4 +18,7 @@ static_assert(T(), ""); static_assert(U(), ""); // expected-error {{ambiguous}} -static_assert(false, L"\x14hi" "!" R"x(")x"); // expected-error {{static assertion failed: L"\024hi!\""}} +static_assert(false, L"\x14hi" // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \ + // expected-error {{invalid escape sequence '\x14' in an unevaluated string literal}} + "!" + R"x(")x"); diff --git a/clang/test/FixIt/fixit-static-assert.cpp b/clang/test/FixIt/fixit-static-assert.cpp --- a/clang/test/FixIt/fixit-static-assert.cpp +++ b/clang/test/FixIt/fixit-static-assert.cpp @@ -11,8 +11,6 @@ // String literal prefixes are good. static_assert(true && R"(RawString)"); // CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:"," -static_assert(true && L"RawString"); -// CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:"," static_assert(true); // CHECK-DAG: {[[@LINE-1]]:19-[[@LINE-1]]:19}:", \"\"" diff --git a/clang/test/Parser/cxx0x-attributes.cpp b/clang/test/Parser/cxx0x-attributes.cpp --- a/clang/test/Parser/cxx0x-attributes.cpp +++ b/clang/test/Parser/cxx0x-attributes.cpp @@ -445,3 +445,8 @@ ) { } }; + +namespace P2361 { +[[deprecated(L"abc")]] void a(); // expected-error{{an unevaluated string literal cannot have an encoding prefix}} +[[nodiscard("\123")]] int b(); // expected-error{{invalid escape sequence '\123' in an unevaluated string literal}} +} diff --git a/clang/test/SemaCXX/static-assert.cpp b/clang/test/SemaCXX/static-assert.cpp --- a/clang/test/SemaCXX/static-assert.cpp +++ b/clang/test/SemaCXX/static-assert.cpp @@ -29,13 +29,23 @@ S s1; // expected-note {{in instantiation of template class 'S' requested here}} S s2; -static_assert(false, L"\xFFFFFFFF"); // expected-error {{static assertion failed: L"\xFFFFFFFF"}} -static_assert(false, u"\U000317FF"); // expected-error {{static assertion failed: u"\U000317FF"}} - -static_assert(false, u8"Ω"); // expected-error {{static assertion failed: u8"\316\251"}} -static_assert(false, L"\u1234"); // expected-error {{static assertion failed: L"\x1234"}} -static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{static assertion failed: L"\x1FF""0\x123""fx\xFFFFFgoop"}} - +static_assert(false, L"\xFFFFFFFF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \ + // expected-error {{invalid escape sequence '\xFFFFFFFF' in an unevaluated string literal}} +static_assert(false, u"\U000317FF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +// FIXME: render this as u8"\u03A9" +static_assert(false, u8"Ω"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +static_assert(false, L"\u1234"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +static_assert(false, L"\x1ff" // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \ + // expected-error {{invalid escape sequence '\x1ff' in an unevaluated string literal}} + "0\x123" // expected-error {{invalid escape sequence '\x123' in an unevaluated string literal}} + "fx\xfffff" // expected-error {{invalid escape sequence '\xfffff' in an unevaluated string literal}} + "goop"); + +static_assert(false, "\'\"\?\\\a\b\f\n\r\t\v"); // expected-error {{'"?\}} +static_assert(true, "\xFF"); // expected-error {{invalid escape sequence '\xFF' in an unevaluated string literal}} +static_assert(true, "\123"); // expected-error {{invalid escape sequence '\123' in an unevaluated string literal}} +static_assert(true, "\pOh no, a Pascal string!"); // expected-warning {{unknown escape sequence '\p'}} \ + // expected-error {{invalid escape sequence '\p' in an unevaluated string literal}} static_assert(false, R"(a \tb c diff --git a/clang/test/SemaCXX/warn-thread-safety-parsing.cpp b/clang/test/SemaCXX/warn-thread-safety-parsing.cpp --- a/clang/test/SemaCXX/warn-thread-safety-parsing.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-parsing.cpp @@ -309,7 +309,7 @@ int gb_var_arg GUARDED_BY(mu1); -int gb_non_ascii GUARDED_BY(L"wide"); // expected-warning {{ignoring 'guarded_by' attribute because its argument is invalid}} +int gb_non_ascii GUARDED_BY(L"wide"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} int gb_var_args __attribute__((guarded_by(mu1, mu2))); // \ // expected-error {{'guarded_by' attribute takes one argument}}