diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -125,6 +125,14 @@ "identifier contains Unicode character that is invisible in " "some environments">, InGroup>; +def err_delimited_escape_incomplete : Error< + "incomplete escape sequence">; +def err_delimited_escape_empty : Error< + "empty escape sequence">; +def err_delimited_escape_missing_brace: Error< + "expected { after \\%0 escape sequence">; +def err_delimited_escape_invalid : Error< + "invalid digit in escape sequence">; def err_hex_escape_no_digits : Error< "\\%0 used with no following hex digits">; def warn_ucn_escape_no_digits : Warning< diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -95,6 +95,7 @@ DiagnosticsEngine *Diags, const LangOptions &Features) { const char *EscapeBegin = ThisTokBuf; + bool Delimited = false; // Skip the '\' char. ++ThisTokBuf; @@ -143,26 +144,45 @@ break; case 'x': { // Hex escape. ResultChar = 0; - if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) { + if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') { + Delimited = true; + ThisTokBuf++; + if (*ThisTokBuf == '}') { + Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, + diag::err_delimited_escape_empty) + << "x"; + return ResultChar; + } + } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) { if (Diags) Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, - diag::err_hex_escape_no_digits) << "x"; - HadError = true; - break; + diag::err_delimited_escape_invalid) + << "x"; + return ResultChar; } // Hex escapes are a maximal series of hex digits. bool Overflow = false; for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) { + if (Delimited && *ThisTokBuf == '}') { + ThisTokBuf++; + Delimited = false; + break; + } int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); - if (CharVal == -1) break; + if (CharVal == -1) { + if (Diags) + Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, + diag::err_delimited_escape_invalid) + << "x"; + continue; + } // About to shift out a digit? if (ResultChar & 0xF0000000) Overflow = true; ResultChar <<= 4; ResultChar |= CharVal; } - // See if any bits will be truncated when evaluated as a character. if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { Overflow = true; @@ -200,7 +220,57 @@ } break; } + case 'o': { + bool Overflow = false; + if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') { + HadError = true; + if (Diags) + Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, + diag::err_delimited_escape_missing_brace) + << 'o'; + break; + } + ResultChar = 0; + Delimited = true; + ++ThisTokBuf; + if (*ThisTokBuf == '}') { + Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, + diag::err_delimited_escape_empty) + << "x"; + return ResultChar; + } + + while (ThisTokBuf != ThisTokEnd) { + if (*ThisTokBuf == '}') { + Delimited = false; + ThisTokBuf++; + break; + } + if (*ThisTokBuf < '0' || *ThisTokBuf > '7') { + if (Diags) + Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, + diag::err_delimited_escape_invalid) + << 1; + ThisTokBuf++; + continue; + } + if (ResultChar & 0x020000000) + Overflow = true; + + ResultChar <<= 3; + ResultChar |= *ThisTokBuf++ - '0'; + } + // Check for overflow. Reject '\777', but not L'\777'. + if (Overflow || (CharWidth != 32 && (ResultChar >> CharWidth) != 0)) { + if (Diags) + Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, + diag::err_escape_too_large) + << 1; + ResultChar &= ~0U >> (32 - CharWidth); + } + break; + } // Otherwise, these are not valid escapes. case '(': case '{': case '[': case '%': // GCC accepts these as extensions. We warn about them as such though. @@ -224,6 +294,12 @@ break; } + if (Delimited) { + Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, + diag::err_delimited_escape_incomplete) + << 0; + } + return ResultChar; } @@ -282,25 +358,72 @@ // Skip the '\u' char's. ThisTokBuf += 2; - if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) { + bool Delimited = false; + bool EndDelimiterFound = false; + + if (UcnBegin[1] == 'u' && in_char_string_literal && + ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') { + Delimited = true; + ThisTokBuf++; + } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) { if (Diags) Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1); return false; } UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8); - unsigned short UcnLenSave = UcnLen; - for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) { + + bool Overflow = false; + unsigned short Count = 0; + for (; ThisTokBuf != ThisTokEnd && (Delimited || Count != UcnLen); + ++ThisTokBuf) { + if (Delimited && *ThisTokBuf == '}') { + ++ThisTokBuf; + EndDelimiterFound = true; + break; + } int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); - if (CharVal == -1) break; + if (CharVal == -1) { + if (Diags) { + Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, + diag::err_delimited_escape_invalid) + << 'u'; + } + Count++; + continue; + } + if (UcnVal & 0xF0000000) { + Overflow = true; + continue; + } UcnVal <<= 4; UcnVal |= CharVal; + Count++; + } + + if (Overflow) { + if (Diags) + Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, + diag::err_escape_too_large) + << 0; + return false; } + + if (Delimited && !EndDelimiterFound) { + if (Diags) { + Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, + diag::err_delimited_escape_incomplete) + << 'u'; + } + return false; + } + // If we didn't consume the proper number of digits, there is a problem. - if (UcnLenSave) { + if (Count == 0 || (!Delimited && Count != UcnLen)) { if (Diags) Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, - diag::err_ucn_escape_incomplete); + Delimited ? diag::err_delimited_escape_empty + : diag::err_ucn_escape_incomplete); return false; } diff --git a/clang/test/Lexer/char-escapes-delimited.cpp b/clang/test/Lexer/char-escapes-delimited.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Lexer/char-escapes-delimited.cpp @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -fsyntax-only -pedantic -verify %s + +const char* errors = +"\u{}" //expected-error {{empty escape sequence}} +"\u{" //expected-error {{incomplete escape sequence}} +"\u{h}" //expected-error {{invalid digit in escape sequence}} +"\x{}" //expected-error {{empty escape sequence}} +"\x{" //expected-error {{incomplete escape sequence}} +"\x{h}" //expected-error {{invalid digit in escape sequence}} +"\o{}" //expected-error {{empty escape sequence}} +"\o{" //expected-error {{incomplete escape sequence}} +"\o{8}" //expected-error {{invalid digit in escape sequence}} +; + +void ucn () { + char a = '\u{1234}'; //expected-error {{character too large for enclosing character literal type}} + char32_t b = U'\u{1234}'; + char32_t b2 = U'\u{1}'; + char32_t c = U'\u{000000000001234}'; + char32_t d = U'\u{111111111}'; //expected-error {{hex escape sequence out of range}} +} + +void hex () { + char a = '\x{1}'; + char b = '\x{g}'; // expected-error {{invalid digit in escape sequence}} + char c = '\x{ff1}'; // expected-error {{hex escape sequence out of range}} + char32_t d = U'\x{FFFFFFFF}'; + char32_t e = U'\x{FFFFFFFF1}'; // expected-error {{hex escape sequence out of range}} +} + +void octal () { + char a = '\o{1}'; + char b = '\o{8}'; // expected-error {{invalid digit in escape sequence}} + char c = '\o{777}'; // //expected-error {{octal escape sequence out of range}} + char32_t d = U'\o{37777777777}'; + char32_t e = U'\o{47777777777}'; // expected-error {{octal escape sequence out of range}} +}