diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -1774,7 +1774,7 @@ /// * An array of getByteLength() char used to store the string data. public: - enum StringKind { Ascii, Wide, UTF8, UTF16, UTF32 }; + enum StringKind { Ascii, Wide, UTF8, UTF16, UTF32, Unevaluated }; private: unsigned numTrailingObjects(OverloadToken) const { return 1; } @@ -1836,8 +1836,9 @@ unsigned CharByteWidth); StringRef getString() const { - assert(getCharByteWidth() == 1 && - "This function is used in places that assume strings use char"); + assert(isUnevaluated() || + getCharByteWidth() == 1 && + "This function is used in places that assume strings use char"); return StringRef(getStrDataAsChar(), getByteLength()); } @@ -1876,6 +1877,7 @@ bool isUTF8() const { return getKind() == UTF8; } bool isUTF16() const { return getKind() == UTF16; } bool isUTF32() const { return getKind() == UTF32; } + bool isUnevaluated() const { return getKind() == Unevaluated; } bool isPascal() const { return StringLiteralBits.IsPascal; } bool containsNonAscii() const { diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -241,6 +241,12 @@ "identifier">, InGroup; def err_unsupported_string_concat : Error< "unsupported non-standard concatenation of string literals">; +def err_unevaluated_string_prefix : Error< + "an unevaluated string literal cannot have an encoding prefix">; +def err_unevaluated_string_udl : Error< + "an unevaluated string literal cannot be a user defined literal">; +def err_unevaluated_string_invalid_escape_sequence : Error< + "Invalid escape sequence '%0' in an unevaluated string literal">; def err_string_concat_mixed_suffix : Error< "differing user-defined suffixes ('%0' and '%1') in string literal " "concatenation">; diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h --- a/clang/include/clang/Lex/LiteralSupport.h +++ b/clang/include/clang/Lex/LiteralSupport.h @@ -217,14 +217,15 @@ unsigned SizeBound; unsigned CharByteWidth; tok::TokenKind Kind; + SmallString<512> UnevaluatedBuf; SmallString<512> ResultBuf; char *ResultPtr; // cursor SmallString<32> UDSuffixBuf; unsigned UDSuffixToken; unsigned UDSuffixOffset; public: - StringLiteralParser(ArrayRef StringToks, - Preprocessor &PP, bool Complain = true); + StringLiteralParser(ArrayRef StringToks, Preprocessor &PP, + bool Unevaluated = false, bool Complain = true); StringLiteralParser(ArrayRef StringToks, const SourceManager &sm, const LangOptions &features, const TargetInfo &target, @@ -238,6 +239,7 @@ bool hadError; bool Pascal; + bool Unevaluated; StringRef GetString() const { return StringRef(ResultBuf.data(), GetStringLength()); @@ -261,6 +263,7 @@ bool isUTF16() const { return Kind == tok::utf16_string_literal; } bool isUTF32() const { return Kind == tok::utf32_string_literal; } bool isPascal() const { return Pascal; } + bool isUnevaluated() const { return Unevaluated; } StringRef getUDSuffix() const { return UDSuffixBuf; } diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -1750,8 +1750,12 @@ bool IsUnevaluated); ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral = false); + ExprResult ParseUnevaluatedStringLiteralExpression(); private: + ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral, + bool Unevaluated); + ExprResult ParseExpressionWithLeadingAt(SourceLocation AtLoc); ExprResult ParseExpressionWithLeadingExtension(SourceLocation ExtLoc); @@ -2649,6 +2653,8 @@ IdentifierInfo *ScopeName, SourceLocation ScopeLoc, ParsedAttr::Syntax Syntax); + ExprResult ParseAttributeArgAsUnevaluatedLiteralOrExpression(ParsedAttr::Kind Kind); + enum ParseAttrKindMask { PAKM_GNU = 1 << 0, PAKM_Declspec = 1 << 1, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -5242,6 +5242,8 @@ ExprResult ActOnStringLiteral(ArrayRef StringToks, Scope *UDLScope = nullptr); + ExprResult ActOnUnevaluatedStringLiteral(ArrayRef StringToks); + ExprResult ActOnGenericSelectionExpr(SourceLocation KeyLoc, SourceLocation DefaultLoc, SourceLocation RParenLoc, diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1056,6 +1056,9 @@ case UTF32: CharByteWidth = Target.getChar32Width(); break; + case Unevaluated: + return sizeof (char); // Host; + break; } assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple"); CharByteWidth /= 8; @@ -1069,35 +1072,43 @@ const SourceLocation *Loc, unsigned NumConcatenated) : Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary) { - assert(Ctx.getAsConstantArrayType(Ty) && - "StringLiteral must be of constant array type!"); - unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind); + unsigned ByteLength = Str.size(); - assert((ByteLength % CharByteWidth == 0) && - "The size of the data must be a multiple of CharByteWidth!"); - - // Avoid the expensive division. The compiler should be able to figure it - // out by itself. However as of clang 7, even with the appropriate - // llvm_unreachable added just here, it is not able to do so. - unsigned Length; - switch (CharByteWidth) { - case 1: - Length = ByteLength; - break; - case 2: - Length = ByteLength / 2; - break; - case 4: - Length = ByteLength / 4; - break; - default: - llvm_unreachable("Unsupported character width!"); - } + unsigned Length = ByteLength; StringLiteralBits.Kind = Kind; - StringLiteralBits.CharByteWidth = CharByteWidth; - StringLiteralBits.IsPascal = Pascal; StringLiteralBits.NumConcatenated = NumConcatenated; + StringLiteralBits.CharByteWidth = 1; + + if (Kind != StringKind::Unevaluated) { + assert(Ctx.getAsConstantArrayType(Ty) && + "StringLiteral must be of constant array type!"); + unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind); + unsigned ByteLength = Str.size(); + assert((ByteLength % CharByteWidth == 0) && + "The size of the data must be a multiple of CharByteWidth!"); + + // Avoid the expensive division. The compiler should be able to figure it + // out by itself. However as of clang 7, even with the appropriate + // llvm_unreachable added just here, it is not able to do so. + switch (CharByteWidth) { + case 1: + Length = ByteLength; + break; + case 2: + Length = ByteLength / 2; + break; + case 4: + Length = ByteLength / 4; + break; + default: + llvm_unreachable("Unsupported character width!"); + } + + StringLiteralBits.CharByteWidth = CharByteWidth; + StringLiteralBits.IsPascal = Pascal; + } + *getTrailingObjects() = Length; // Initialize the trailing array of SourceLocation. @@ -1143,6 +1154,8 @@ void StringLiteral::outputString(raw_ostream &OS) const { switch (getKind()) { + case Unevaluated: + break; // no prefic case Ascii: break; // no prefix. case Wide: OS << 'L'; break; case UTF8: OS << "u8"; break; @@ -1259,7 +1272,8 @@ const TargetInfo &Target, unsigned *StartToken, unsigned *StartTokenByteOffset) const { assert((getKind() == StringLiteral::Ascii || - getKind() == StringLiteral::UTF8) && + getKind() == StringLiteral::UTF8 || + getKind() == StringLiteral::Unevaluated) && "Only narrow string literals are currently supported"); // Loop over all of the tokens in this string until we find the one that diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -265,7 +265,7 @@ if (T.isAtStartOfLine() || T.getKind() != tok::string_literal) return SourceLocation(); - StringLiteralParser Literal(T, CI.getPreprocessor()); + StringLiteralParser Literal(T, CI.getPreprocessor(), /*Unevaluated*/ true); if (Literal.hadError) return SourceLocation(); RawLexer->LexFromRawLexer(T); diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -86,14 +86,26 @@ MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd); } +static bool EscapeValidInUnevaluatedStringLiteral(char Escape) { + switch (Escape) { + case '\\': + case '\'': + case '"': + case '?': + case 'n': + case 't': + return true; + } + return false; +} + /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in /// either a character or a string literal. -static unsigned ProcessCharEscape(const char *ThisTokBegin, - const char *&ThisTokBuf, - const char *ThisTokEnd, bool &HadError, - FullSourceLoc Loc, unsigned CharWidth, - DiagnosticsEngine *Diags, - const LangOptions &Features) { +static unsigned +ProcessCharEscape(const char *ThisTokBegin, const char *&ThisTokBuf, + const char *ThisTokEnd, bool &HadError, FullSourceLoc Loc, + unsigned CharWidth, DiagnosticsEngine *Diags, + const LangOptions &Features, bool Unevaluated) { const char *EscapeBegin = ThisTokBuf; // Skip the '\' char. @@ -224,6 +236,11 @@ break; } + if (Unevaluated && !EscapeValidInUnevaluatedStringLiteral(*EscapeBegin)) { + Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, + diag::err_unevaluated_string_invalid_escape_sequence) + << *EscapeBegin; + } return ResultChar; } @@ -1380,10 +1397,10 @@ continue; } unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo()); - uint64_t result = - ProcessCharEscape(TokBegin, begin, end, HadError, - FullSourceLoc(Loc,PP.getSourceManager()), - CharWidth, &PP.getDiagnostics(), PP.getLangOpts()); + uint64_t result = ProcessCharEscape( + TokBegin, begin, end, HadError, + FullSourceLoc(Loc, PP.getSourceManager()), CharWidth, + &PP.getDiagnostics(), PP.getLangOpts(), /*Unevaluated*/ false); *buffer_begin++ = result; } @@ -1491,13 +1508,15 @@ /// hex-digit hex-digit hex-digit hex-digit /// \endverbatim /// -StringLiteralParser:: -StringLiteralParser(ArrayRef StringToks, - Preprocessor &PP, bool Complain) - : SM(PP.getSourceManager()), Features(PP.getLangOpts()), - Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() :nullptr), - MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), - ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { +StringLiteralParser::StringLiteralParser(ArrayRef StringToks, + Preprocessor &PP, bool Unevaluated, + bool Complain) + : SM(PP.getSourceManager()), Features(PP.getLangOpts()), + Target(PP.getTargetInfo()), + Diags(Complain ? &PP.getDiagnostics() : nullptr), MaxTokenLength(0), + SizeBound(0), CharByteWidth(0), Kind(tok::unknown), + ResultPtr(ResultBuf.data()), hadError(false), Pascal(false), + Unevaluated(Unevaluated) { init(StringToks); } @@ -1515,11 +1534,30 @@ MaxTokenLength = StringToks[0].getLength(); assert(StringToks[0].getLength() >= 2 && "literal token is invalid!"); SizeBound = StringToks[0].getLength()-2; // -2 for "". - Kind = StringToks[0].getKind(); - hadError = false; - // Implement Translation Phase #6: concatenation of string literals + // Determines the kind of string from the prefix + Kind = tok::string_literal; + for (const auto &Tok : StringToks) { + // Unevaluated string literals can never have a prefix + if (Unevaluated && Tok.getKind() != tok::string_literal) { + if (Diags) + Diags->Report(Tok.getLocation(), diag::err_unevaluated_string_prefix); + hadError = true; + continue; + } + if (Tok.is(tok::string_literal)) + continue; + if (Tok.is(Kind) || Kind == tok::string_literal) { + Kind = Tok.getKind(); + continue; + } + if (Diags) { + Diags->Report(Tok.getLocation(), diag::err_unsupported_string_concat); + hadError = true; + } + } + /// (C99 5.1.1.2p1). The common case is only one string fragment. for (unsigned i = 1; i != StringToks.size(); ++i) { if (StringToks[i].getLength() < 2) @@ -1533,19 +1571,6 @@ // Remember maximum string piece length. if (StringToks[i].getLength() > MaxTokenLength) MaxTokenLength = StringToks[i].getLength(); - - // Remember if we see any wide or utf-8/16/32 strings. - // Also check for illegal concatenations. - if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) { - if (isAscii()) { - Kind = StringToks[i].getKind(); - } else { - if (Diags) - Diags->Report(StringToks[i].getLocation(), - diag::err_unsupported_string_concat); - hadError = true; - } - } } // Include space for the null terminator. @@ -1620,13 +1645,16 @@ // result of a concatenation involving at least one user-defined-string- // literal, all the participating user-defined-string-literals shall // have the same ud-suffix. - if (UDSuffixBuf != UDSuffix) { + const bool UnevaluatedStringHasUDL = Unevaluated && !UDSuffix.empty(); + if (UDSuffixBuf != UDSuffix || UnevaluatedStringHasUDL) { if (Diags) { SourceLocation TokLoc = StringToks[i].getLocation(); - Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) - << UDSuffixBuf << UDSuffix - << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) - << SourceRange(TokLoc, TokLoc); + Diags->Report(TokLoc, UnevaluatedStringHasUDL + ? diag::err_unevaluated_string_udl + : diag::err_string_concat_mixed_suffix) + << UDSuffixBuf << UDSuffix + << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) + << SourceRange(TokLoc, TokLoc); } hadError = true; } @@ -1697,8 +1725,9 @@ ++ThisTokBuf; // skip " // Check if this is a pascal string - if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd && - ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') { + if (!Unevaluated && Features.PascalStrings && + ThisTokBuf + 1 != ThisTokEnd && ThisTokBuf[0] == '\\' && + ThisTokBuf[1] == 'p') { // If the \p sequence is found in the first token, we have a pascal string // Otherwise, if we already have a pascal string, ignore the first \p @@ -1733,9 +1762,9 @@ } // Otherwise, this is a non-UCN escape character. Process it. unsigned ResultChar = - ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError, - FullSourceLoc(StringToks[i].getLocation(), SM), - CharByteWidth*8, Diags, Features); + ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError, + FullSourceLoc(StringToks[i].getLocation(), SM), + CharByteWidth * 8, Diags, Features, Unevaluated); if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of @@ -1757,6 +1786,7 @@ } } + assert((!Pascal || !Unevaluated) && "Pascal string in unevaluated context"); if (Pascal) { if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of @@ -1929,8 +1959,8 @@ ByteNo -= Len; } else { ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError, - FullSourceLoc(Tok.getLocation(), SM), - CharByteWidth*8, Diags, Features); + FullSourceLoc(Tok.getLocation(), SM), CharByteWidth * 8, + Diags, Features, false); --ByteNo; } assert(!HadError && "This method isn't valid on erroneous strings"); diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -1280,17 +1280,11 @@ return; } else { // Parse and validate the string, converting it into a unique ID. - StringLiteralParser Literal(StrTok, *this); - assert(Literal.isAscii() && "Didn't allow wide strings in"); + StringLiteralParser Literal(StrTok, *this, /*Unevaluated*/ true); if (Literal.hadError) { DiscardUntilEndOfDirective(); return; } - if (Literal.Pascal) { - Diag(StrTok, diag::err_pp_linemarker_invalid_filename); - DiscardUntilEndOfDirective(); - return; - } FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString()); // Verify that there is nothing after the string, other than EOD. Because @@ -1430,17 +1424,11 @@ return; } else { // Parse and validate the string, converting it into a unique ID. - StringLiteralParser Literal(StrTok, *this); - assert(Literal.isAscii() && "Didn't allow wide strings in"); + StringLiteralParser Literal(StrTok, *this, /*Unevaluated*/ true); if (Literal.hadError) { DiscardUntilEndOfDirective(); return; } - if (Literal.Pascal) { - Diag(StrTok, diag::err_pp_linemarker_invalid_filename); - DiscardUntilEndOfDirective(); - return; - } FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString()); // If a filename was present, read any flags that are present. diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1815,7 +1815,7 @@ if (!Tok.isAnnotation() && Tok.getIdentifierInfo()) Tok.setKind(tok::identifier); else if (Tok.is(tok::string_literal) && !Tok.hasUDSuffix()) { - StringLiteralParser Literal(Tok, *this); + StringLiteralParser Literal(Tok, *this, /*Unevaluated*/ true); if (Literal.hadError) return; diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -1082,7 +1082,7 @@ if (DiagName.is(tok::eod)) PP.getDiagnostics().dump(); else if (DiagName.is(tok::string_literal) && !DiagName.hasUDSuffix()) { - StringLiteralParser Literal(DiagName, PP); + StringLiteralParser Literal(DiagName, PP, /*Unevaluated*/ true); if (Literal.hadError) return; PP.getDiagnostics().dump(Literal.GetString()); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -366,13 +366,15 @@ if (ChangeKWThisToIdent && Tok.is(tok::kw_this)) Tok.setKind(tok::identifier); + ParsedAttr::Kind AttrKind = + ParsedAttr::getParsedKind(AttrName, ScopeName, Syntax); + + ArgsVector ArgExprs; if (Tok.is(tok::identifier)) { // If this attribute wants an 'identifier' argument, make it so. bool IsIdentifierArg = attributeHasIdentifierArg(*AttrName) || attributeHasVariadicIdentifierArg(*AttrName); - ParsedAttr::Kind AttrKind = - ParsedAttr::getParsedKind(AttrName, ScopeName, Syntax); // If we don't know how to parse this attribute, but this is the only // token in this argument, assume it's meant to be an identifier. @@ -419,7 +421,7 @@ : Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult ArgExpr( - Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression())); + Actions.CorrectDelayedTyposInExpr(ParseAttributeArgAsUnevaluatedLiteralOrExpression(AttrKind))); if (ArgExpr.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return 0; @@ -449,6 +451,16 @@ return static_cast(ArgExprs.size() + !TheParsedType.get().isNull()); } +ExprResult +Parser::ParseAttributeArgAsUnevaluatedLiteralOrExpression(ParsedAttr::Kind Kind) { + if(isTokenStringLiteral() && (Kind == ParsedAttr::AT_Deprecated || Kind == ParsedAttr::AT_WarnUnusedResult)) { + ExprResult Result = ParseUnevaluatedStringLiteralExpression(); + if(!Result.isInvalid()) + return Result; + } + return ParseAssignmentExpression(); +} + /// Parse the arguments to a parameterized GNU attribute or /// a C++11 attribute in "gnu" namespace. void Parser::ParseGNUAttributeArgs(IdentifierInfo *AttrName, @@ -1137,20 +1149,9 @@ return; } if (Keyword == Ident_message) - MessageExpr = ParseStringLiteralExpression(); + MessageExpr = ParseUnevaluatedStringLiteralExpression(); else - ReplacementExpr = ParseStringLiteralExpression(); - // Also reject wide string literals. - if (StringLiteral *MessageStringLiteral = - cast_or_null(MessageExpr.get())) { - if (!MessageStringLiteral->isAscii()) { - Diag(MessageStringLiteral->getSourceRange().getBegin(), - diag::err_expected_string_literal) - << /*Source='availability attribute'*/ 2; - SkipUntil(tok::r_paren, StopAtSemi); - return; - } - } + ReplacementExpr = ParseUnevaluatedStringLiteralExpression(); if (Keyword == Ident_message) break; else @@ -1329,19 +1330,19 @@ if (HadLanguage) { Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause) << Keyword; - ParseStringLiteralExpression(); + ParseUnevaluatedStringLiteralExpression(); continue; } - Language = ParseStringLiteralExpression(); + Language = ParseUnevaluatedStringLiteralExpression(); } else { assert(Keyword == Ident_defined_in && "Invalid clause keyword!"); if (HadDefinedIn) { Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause) << Keyword; - ParseStringLiteralExpression(); + ParseUnevaluatedStringLiteralExpression(); continue; } - DefinedInExpr = ParseStringLiteralExpression(); + DefinedInExpr = ParseUnevaluatedStringLiteralExpression(); } } while (TryConsumeToken(tok::comma)); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -334,7 +334,7 @@ /// Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) { assert(isTokenStringLiteral() && "Not a string literal!"); - ExprResult Lang = ParseStringLiteralExpression(false); + ExprResult Lang = ParseUnevaluatedStringLiteralExpression(); ParseScope LinkageScope(this, Scope::DeclScope); Decl *LinkageSpec = @@ -967,7 +967,7 @@ return nullptr; } - AssertMessage = ParseStringLiteralExpression(); + AssertMessage = ParseUnevaluatedStringLiteralExpression(); if (AssertMessage.isInvalid()) { SkipMalformedDecl(); return nullptr; @@ -4510,7 +4510,7 @@ Toks[0].setLiteralData(StrBuffer.data()); Toks[0].setLength(StrBuffer.size()); StringLiteral *UuidString = - cast(Actions.ActOnStringLiteral(Toks, nullptr).get()); + cast(Actions.ActOnStringLiteral(Toks).get()); ArgExprs.push_back(UuidString); } diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3164,6 +3164,15 @@ /// string-literal /// \verbatim ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) { + return ParseStringLiteralExpression(AllowUserDefinedLiteral, false); +} + +ExprResult Parser::ParseUnevaluatedStringLiteralExpression() { + return ParseStringLiteralExpression(false, true); +} + +ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral, + bool Unevaluated) { assert(isTokenStringLiteral() && "Not a string literal!"); // String concat. Note that keywords like __func__ and __FUNCTION__ are not @@ -3175,6 +3184,11 @@ ConsumeStringToken(); } while (isTokenStringLiteral()); + if (Unevaluated) { + assert(!AllowUserDefinedLiteral && "UDL are always evaluated"); + return Actions.ActOnUnevaluatedStringLiteral(StringToks); + } + // Pass the set of string tokens, ready for concatenation, to the actions. return Actions.ActOnStringLiteral(StringToks, AllowUserDefinedLiteral ? getCurScope() diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -1527,15 +1527,9 @@ return ExprError(); } - ExprResult AsmString(ParseStringLiteralExpression()); + ExprResult AsmString(ParseUnevaluatedStringLiteralExpression()); if (!AsmString.isInvalid()) { const auto *SL = cast(AsmString.get()); - if (!SL->isAscii()) { - Diag(Tok, diag::err_asm_operand_wide_string_literal) - << SL->isWide() - << SL->getSourceRange(); - return ExprError(); - } if (ForAsmLabel && SL->getString().empty()) { Diag(Tok, diag::err_asm_operand_wide_string_literal) << 2 /* an empty */ << SL->getSourceRange(); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -359,7 +359,9 @@ if (ArgLocation) *ArgLocation = ArgExpr->getBeginLoc(); - if (!Literal || !Literal->isAscii()) { + // TODO all StringLiteral here should be unevaluated + + if (!Literal || (!Literal->isUnevaluated() && !Literal->isAscii())) { Diag(ArgExpr->getBeginLoc(), diag::err_attribute_argument_type) << AL << AANT_ArgumentString; return false; diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -16043,11 +16043,7 @@ Expr *LangStr, SourceLocation LBraceLoc) { StringLiteral *Lit = cast(LangStr); - if (!Lit->isAscii()) { - Diag(LangStr->getExprLoc(), diag::err_language_linkage_spec_not_ascii) - << LangStr->getSourceRange(); - return nullptr; - } + assert(Lit->isUnevaluated() && "Unexpected string literal kind"); StringRef Lang = Lit->getString(); LinkageSpecDecl::LanguageIDs Language; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1791,6 +1791,29 @@ return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc); } +ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef StringToks) { + StringLiteralParser Literal(StringToks, PP, true); + if (Literal.hadError) + return ExprError(); + + SmallVector StringTokLocs; + for (const Token &Tok : StringToks) + StringTokLocs.push_back(Tok.getLocation()); + + StringLiteral *Lit = StringLiteral::Create( + Context, Literal.GetString(), StringLiteral::Unevaluated, false, {}, + &StringTokLocs[0], StringTokLocs.size()); + + if (!Literal.getUDSuffix().empty()) { + SourceLocation UDSuffixLoc = + getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()], + Literal.getUDSuffixOffset()); + return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl)); + } + + return Lit; +} + /// ActOnStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -3960,6 +3960,9 @@ case StringLiteral::Wide: return Context.typesAreCompatible(Context.getWideCharType(), QualType(ToPointeeType, 0)); + case StringLiteral::Unevaluated: + assert(false && "Unevaluated string literal in expression"); + break; } } } diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -129,6 +129,9 @@ if (IsWideCharCompatible(ElemTy, Context)) return SIF_IncompatWideStringIntoWideChar; return SIF_Other; + case StringLiteral::Unevaluated: + assert(false && "Unevaluated string literal in initialization"); + break; } llvm_unreachable("missed a StringLiteral kind?"); diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp --- a/clang/lib/Sema/SemaStmtAsm.cpp +++ b/clang/lib/Sema/SemaStmtAsm.cpp @@ -254,7 +254,7 @@ SmallVector OutputConstraintInfos; // The parser verifies that there is a string literal here. - assert(AsmString->isAscii()); + assert(AsmString->isUnevaluated()); FunctionDecl *FD = dyn_cast(getCurLexicalContext()); llvm::StringMap FeatureMap; @@ -262,7 +262,7 @@ for (unsigned i = 0; i != NumOutputs; i++) { StringLiteral *Literal = Constraints[i]; - assert(Literal->isAscii()); + assert(Literal->isUnevaluated()); StringRef OutputName; if (Names[i]) @@ -353,7 +353,7 @@ for (unsigned i = NumOutputs, e = NumOutputs + NumInputs; i != e; i++) { StringLiteral *Literal = Constraints[i]; - assert(Literal->isAscii()); + assert(Literal->isUnevaluated()); StringRef InputName; if (Names[i]) @@ -458,7 +458,7 @@ // Check that the clobbers are valid. for (unsigned i = 0; i != NumClobbers; i++) { StringLiteral *Literal = Clobbers[i]; - assert(Literal->isAscii()); + assert(Literal->isUnevaluated()); StringRef Clobber = Literal->getString(); diff --git a/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp b/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp --- a/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp @@ -8,7 +8,7 @@ extern "C" plusplus { } -extern u8"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern L"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern u"C++" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} -extern U"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}} +extern u8"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern L"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern u"C++" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +extern U"C" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}} diff --git a/clang/test/CXX/dcl.dcl/p4-0x.cpp b/clang/test/CXX/dcl.dcl/p4-0x.cpp --- a/clang/test/CXX/dcl.dcl/p4-0x.cpp +++ b/clang/test/CXX/dcl.dcl/p4-0x.cpp @@ -18,4 +18,4 @@ static_assert(T(), ""); static_assert(U(), ""); // expected-error {{ambiguous}} -static_assert(false, L"\x14hi" "!" R"x(")x"); // expected-error {{static_assert failed L"\024hi!\""}} +static_assert(false, L"\x14hi" "!" R"x(")x"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} diff --git a/clang/test/FixIt/fixit-static-assert.cpp b/clang/test/FixIt/fixit-static-assert.cpp --- a/clang/test/FixIt/fixit-static-assert.cpp +++ b/clang/test/FixIt/fixit-static-assert.cpp @@ -11,8 +11,6 @@ // String literal prefixes are good. static_assert(true && R"(RawString)"); // CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:"," -static_assert(true && L"RawString"); -// CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:"," static_assert(true); // CHECK-DAG: {[[@LINE-1]]:19-[[@LINE-1]]:19}:", \"\"" diff --git a/clang/test/Parser/asm.c b/clang/test/Parser/asm.c --- a/clang/test/Parser/asm.c +++ b/clang/test/Parser/asm.c @@ -11,7 +11,9 @@ void f2() { asm("foo" : "=r" (a)); // expected-error {{use of undeclared identifier 'a'}} - asm("foo" : : "r" (b)); // expected-error {{use of undeclared identifier 'b'}} + asm("foo" + : + : "r"(b)); // expected-error {{use of undeclared identifier 'b'}} } void a() __asm__(""); // expected-error {{cannot use an empty string literal in 'asm'}} @@ -23,7 +25,7 @@ __asm ; // expected-error {{expected '(' after 'asm'}} // - Don't crash on wide string literals in 'asm'. -int foo asm (L"bar"); // expected-error {{cannot use wide string literal in 'asm'}} +int foo asm(L"bar"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} asm() // expected-error {{expected string literal in 'asm'}} // expected-error@-1 {{expected ';' after top-level asm block}} diff --git a/clang/test/Parser/asm.cpp b/clang/test/Parser/asm.cpp --- a/clang/test/Parser/asm.cpp +++ b/clang/test/Parser/asm.cpp @@ -1,9 +1,10 @@ // RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s int foo1 asm ("bar1"); -int foo2 asm (L"bar2"); // expected-error {{cannot use wide string literal in 'asm'}} -int foo3 asm (u8"bar3"); // expected-error {{cannot use unicode string literal in 'asm'}} -int foo4 asm (u"bar4"); // expected-error {{cannot use unicode string literal in 'asm'}} -int foo5 asm (U"bar5"); // expected-error {{cannot use unicode string literal in 'asm'}} +int foo2 asm(L"bar2"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +int foo3 asm(u8"bar3"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +int foo4 asm(u"bar4"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +int foo5 asm(U"bar5"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} int foo6 asm ("bar6"_x); // expected-error {{string literal with user-defined suffix cannot be used here}} -int foo6 asm ("" L"bar7"); // expected-error {{cannot use wide string literal in 'asm'}} +int foo6 asm("" + L"bar7"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} diff --git a/clang/test/Parser/attr-availability.c b/clang/test/Parser/attr-availability.c --- a/clang/test/Parser/attr-availability.c +++ b/clang/test/Parser/attr-availability.c @@ -20,15 +20,18 @@ void f7() __attribute__((availability(macosx,message=L"wide"))); // expected-error {{expected string literal for optional message in 'availability' attribute}} -void f8() __attribute__((availability(macosx,message="a" L"b"))); // expected-error {{expected string literal for optional message in 'availability' attribute}} +void f8() __attribute__((availability(macosx, message = "a" + L"b"))); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} void f9() __attribute__((availability(macosx,message=u8"b"))); // expected-error {{expected string literal for optional message in 'availability' attribute}} -void f10() __attribute__((availability(macosx,message="a" u8"b"))); // expected-error {{expected string literal for optional message in 'availability' attribute}} +void f10() __attribute__((availability(macosx, message = "a" + u8"b"))); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} void f11() __attribute__((availability(macosx,message=u"b"))); // expected-error {{expected string literal for optional message in 'availability' attribute}} -void f12() __attribute__((availability(macosx,message="a" u"b"))); // expected-error {{expected string literal for optional message in 'availability' attribute}} +void f12() __attribute__((availability(macosx, message = "a" + u"b"))); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} // rdar://10095131 enum E{ diff --git a/clang/test/Sema/asm.c b/clang/test/Sema/asm.c --- a/clang/test/Sema/asm.c +++ b/clang/test/Sema/asm.c @@ -37,14 +37,17 @@ asm ("nop" : "=c" (a) : "r" (no_clobber_conflict) : "%rcx"); // expected-error {{asm-specifier for input or output variable conflicts with asm clobber list}} asm ("nop" : "=r" (no_clobber_conflict) : "c" (c) : "%rcx"); // expected-error {{asm-specifier for input or output variable conflicts with asm clobber list}} asm ("nop" : "=r" (clobber_conflict) : "c" (c) : "%rcx"); // expected-error {{asm-specifier for input or output variable conflicts with asm clobber list}} - asm ("nop" : "=a" (a) : "b" (b) : "%rcx", "%rbx"); // expected-error {{asm-specifier for input or output variable conflicts with asm clobber list}} + asm("nop" + : "=a"(a) + : "b"(b) + : "%rcx", "%rbx"); // expected-error {{asm-specifier for input or output variable conflicts with asm clobber list}} } // rdar://6094010 void test3() { int x; - asm(L"foo" : "=r"(x)); // expected-error {{wide string}} - asm("foo" : L"=r"(x)); // expected-error {{wide string}} + asm(L"foo" : "=r"(x)); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} + asm("foo" : L"=r"(x)); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} } // diff --git a/clang/test/SemaCXX/static-assert.cpp b/clang/test/SemaCXX/static-assert.cpp --- a/clang/test/SemaCXX/static-assert.cpp +++ b/clang/test/SemaCXX/static-assert.cpp @@ -28,12 +28,12 @@ S s1; // expected-note {{in instantiation of template class 'S' requested here}} S s2; -static_assert(false, L"\xFFFFFFFF"); // expected-error {{static_assert failed L"\xFFFFFFFF"}} -static_assert(false, u"\U000317FF"); // expected-error {{static_assert failed u"\U000317FF"}} +static_assert(false, L"\xFFFFFFFF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} expected-error {{hex escape sequence out of range}} +static_assert(false, u"\U000317FF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} // FIXME: render this as u8"\u03A9" -static_assert(false, u8"Ω"); // expected-error {{static_assert failed u8"\316\251"}} -static_assert(false, L"\u1234"); // expected-error {{static_assert failed L"\x1234"}} -static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{static_assert failed L"\x1FF""0\x123""fx\xFFFFFgoop"}} +static_assert(false, u8"Ω"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +static_assert(false, L"\u1234"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} +static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} expected-error 3{{hex escape sequence out of range}} template struct AlwaysFails { // Only give one error here.