diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -143,8 +143,8 @@ /// True if this is the first time we're lexing the input file. bool IsFirstTimeLexingFile; - // NewLineOffset - A offset to new line character '\n' being lexed. For '\r\n', - // it also points to '\n.' + // NewLineOffset - A offset to new line character '\n' being lexed. For + // '\r\n', it also points to '\n.' Optional NewLineOffset; // CurrentConflictMarkerState - The kind of conflict marker we are handling. @@ -157,7 +157,8 @@ /// next token to use from the current dependency directive. unsigned NextDepDirectiveTokenIndex = 0; - void InitLexer(const char *BufStart, unsigned BufferOffset, unsigned BufferSize); + void InitLexer(const char *BufStart, unsigned BufferOffset, + unsigned BufferSize); public: /// Lexer constructor - Create a new lexer object for the specified buffer @@ -281,9 +282,7 @@ void resetExtendedTokenMode(); /// Gets source code buffer. - StringRef getBuffer() const { - return StringRef(BufferStart, BufferSize); - } + StringRef getBuffer() const { return StringRef(BufferStart, BufferSize); } /// ReadToEndOfLine - Read the rest of the current preprocessor line as an /// uninterpreted string. This switches the lexer out of directive mode. @@ -305,15 +304,13 @@ } /// Return the current location in the buffer. - const char *getBufferLocation() const { + const char *getBufferLocation() const { assert(BufferOffset <= BufferSize && "Invalid buffer state"); return BufferStart + BufferOffset; } /// Returns the current lexing offset. - unsigned getCurrentBufferOffset() { - return BufferOffset; - } + unsigned getCurrentBufferOffset() { return BufferOffset; } /// Set the lexer's buffer pointer to \p Offset. void seek(unsigned Offset, bool IsAtStartOfLine); @@ -618,9 +615,8 @@ /// takes that range and assigns it to the token as its location and size. In /// addition, since tokens cannot overlap, this also updates BufferPtr to be /// TokEnd. - void FormTokenWithChars(Token &Result, unsigned TokEnd, - tok::TokenKind Kind) { - unsigned TokLen = TokEnd-BufferOffset; + void FormTokenWithChars(Token &Result, unsigned TokEnd, tok::TokenKind Kind) { + unsigned TokLen = TokEnd - BufferOffset; Result.setLength(TokLen); Result.setLocation(getSourceLocation(BufferOffset, TokLen)); Result.setKind(Kind); @@ -665,7 +661,8 @@ inline char getAndAdvanceChar(unsigned &Offset, Token &Tok) { // If this is not a trigraph and not a UCN or escaped newline, return // quickly. - if (isObviouslySimpleCharacter(BufferStart[Offset])) return BufferStart[Offset++]; + if (isObviouslySimpleCharacter(BufferStart[Offset])) + return BufferStart[Offset++]; unsigned Size = 0; char C = getCharAndSizeSlow(Offset, Size, &Tok); @@ -680,13 +677,13 @@ unsigned ConsumeChar(unsigned Offset, unsigned Size, Token &Tok) { // Normal case, we consumed exactly one token. Just return it. if (Size == 1) - return Offset+Size; + return Offset + Size; // Otherwise, re-lex the character with a current token, allowing // diagnostics to be emitted and flags to be set. Size = 0; getCharAndSizeSlow(Offset, Size, &Tok); - return Offset+Size; + return Offset + Size; } /// getCharAndSize - Peek a single 'character' from the specified buffer, @@ -732,8 +729,7 @@ void PropagateLineStartLeadingSpaceInfo(Token &Result); - unsigned LexUDSuffix(Token &Result, unsigned CurOffset, - bool IsStringLiteral); + unsigned LexUDSuffix(Token &Result, unsigned CurOffset, bool IsStringLiteral); // Helper functions to lex the remainder of a token of the specific type. @@ -741,22 +737,20 @@ // the first codepoint of the identifyier has been parsed. bool LexIdentifierContinue(Token &Result, unsigned CurOffset); - bool LexNumericConstant (Token &Result, unsigned CurOffset); - bool LexStringLiteral (Token &Result, unsigned CurOffset, - tok::TokenKind Kind); - bool LexRawStringLiteral (Token &Result, unsigned CurOffset, - tok::TokenKind Kind); + bool LexNumericConstant(Token &Result, unsigned CurOffset); + bool LexStringLiteral(Token &Result, unsigned CurOffset, tok::TokenKind Kind); + bool LexRawStringLiteral(Token &Result, unsigned CurOffset, + tok::TokenKind Kind); bool LexAngledStringLiteral(Token &Result, unsigned CurOffset); - bool LexCharConstant (Token &Result, unsigned CurOffset, - tok::TokenKind Kind); - bool LexEndOfFile (Token &Result, unsigned CurOffset); - bool SkipWhitespace (Token &Result, unsigned CurOffset, - bool &TokAtPhysicalStartOfLine); - bool SkipLineComment (Token &Result, unsigned CurOffset, - bool &TokAtPhysicalStartOfLine); - bool SkipBlockComment (Token &Result, unsigned CurOffset, - bool &TokAtPhysicalStartOfLine); - bool SaveLineComment (Token &Result, unsigned CurOffset); + bool LexCharConstant(Token &Result, unsigned CurOffset, tok::TokenKind Kind); + bool LexEndOfFile(Token &Result, unsigned CurOffset); + bool SkipWhitespace(Token &Result, unsigned CurOffset, + bool &TokAtPhysicalStartOfLine); + bool SkipLineComment(Token &Result, unsigned CurOffset, + bool &TokAtPhysicalStartOfLine); + bool SkipBlockComment(Token &Result, unsigned CurOffset, + bool &TokAtPhysicalStartOfLine); + bool SaveLineComment(Token &Result, unsigned CurOffset); bool IsStartOfConflictMarker(unsigned CurOffset); bool HandleEndOfConflictMarker(unsigned CurOffset); @@ -768,13 +762,13 @@ bool isHexaLiteral(unsigned Start, const LangOptions &LangOpts); - void codeCompleteIncludedFile(unsigned PathStart, - unsigned CompletionPoint, bool IsAngled); + void codeCompleteIncludedFile(unsigned PathStart, unsigned CompletionPoint, + bool IsAngled); - std::optional - tryReadNumericUCN(unsigned &StartOffset, unsigned SlashLoc, Token *Result); + std::optional tryReadNumericUCN(unsigned &StartOffset, + unsigned SlashLoc, Token *Result); std::optional tryReadNamedUCN(unsigned &StartOffset, - unsigned SlashLoc, Token *Result); + unsigned SlashLoc, Token *Result); /// Read a universal character name. /// diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -1016,7 +1016,7 @@ /// excluded conditional directives. It maps the source buffer pointer at /// the beginning of a skipped block, to the number of bytes that should be /// skipped. - llvm::DenseMap RecordedSkippedRanges; + llvm::DenseMap RecordedSkippedRanges; void updateOutOfDateIdentifier(IdentifierInfo &II) const; diff --git a/clang/lib/Format/.clang-format b/clang/lib/Format/.clang-format --- a/clang/lib/Format/.clang-format +++ b/clang/lib/Format/.clang-format @@ -1,4 +1,3 @@ BasedOnStyle: LLVM InsertBraces: true -LineEnding: LF RemoveBracesLLVM: true diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -138,8 +138,7 @@ FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)), LangOpts(PP.getLangOpts()), LineComment(LangOpts.LineComment), IsFirstTimeLexingFile(IsFirstIncludeOfFile) { - InitLexer(InputFile.getBufferStart(), 0, - InputFile.getBufferSize()); + InitLexer(InputFile.getBufferStart(), 0, InputFile.getBufferSize()); resetExtendedTokenMode(); } @@ -152,7 +151,7 @@ bool IsFirstIncludeOfFile) : FileLoc(fileloc), LangOpts(langOpts), LineComment(LangOpts.LineComment), IsFirstTimeLexingFile(IsFirstIncludeOfFile) { - InitLexer(BufStart, BufPtr-BufStart, BufEnd-BufStart); + InitLexer(BufStart, BufPtr - BufStart, BufEnd - BufStart); // We *are* in raw mode. LexingRawMode = true; @@ -208,7 +207,8 @@ const char *StrData = SM.getCharacterData(SpellingLoc); L->BufferStart = InputFile.getBufferStart(); - L->BufferOffset = StrData - InputFile.getBufferStart(); // FIXME: this is wrong + L->BufferOffset = + StrData - InputFile.getBufferStart(); // FIXME: this is wrong L->BufferSize = L->BufferOffset + TokLen; assert(L->BufferStart[L->BufferSize] == 0 && "Buffer is not nul terminated!"); @@ -604,7 +604,7 @@ unsigned MaxLineOffset = 0; if (MaxLines) { - const char* CurPtr = Buffer.begin(); + const char *CurPtr = Buffer.begin(); unsigned CurLine = 0; while (CurPtr != Buffer.end()) { char ch = *CurPtr++; @@ -1147,10 +1147,8 @@ /// getSourceLocation - Return a source location identifier for the specified /// offset in the current file. -SourceLocation Lexer::getSourceLocation(unsigned Loc, - unsigned TokLen) const { - assert(Loc <= BufferSize && - "Location out of range for this buffer!"); +SourceLocation Lexer::getSourceLocation(unsigned Loc, unsigned TokLen) const { + assert(Loc <= BufferSize && "Location out of range for this buffer!"); // In the normal case, we're just lexing from a simple file buffer, return // the file id from FileLoc with the offset specified. @@ -1202,12 +1200,13 @@ if (!Trigraphs) { if (L && !L->isLexingRawMode()) - L->Diag(CP-2-L->getBuffer().data(), diag::trigraph_ignored); + L->Diag(CP - 2 - L->getBuffer().data(), diag::trigraph_ignored); return 0; } if (L && !L->isLexingRawMode()) - L->Diag(CP-2-L->getBuffer().data(), diag::trigraph_converted) << StringRef(&Res, 1); + L->Diag(CP - 2 - L->getBuffer().data(), diag::trigraph_converted) + << StringRef(&Res, 1); return Res; } @@ -1338,29 +1337,31 @@ /// /// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should /// be updated to match. -char Lexer::getCharAndSizeSlow(unsigned Offset, unsigned &Size, - Token *Tok) { +char Lexer::getCharAndSizeSlow(unsigned Offset, unsigned &Size, Token *Tok) { // If we have a slash, look for an escaped newline. if (BufferStart[Offset] == '\\') { ++Size; ++Offset; Slash: // Common case, backslash-char where the char is not whitespace. - if (!isWhitespace(BufferStart[Offset])) return '\\'; + if (!isWhitespace(BufferStart[Offset])) + return '\\'; // See if we have optional whitespace characters between the slash and // newline. - if (unsigned EscapedNewLineSize = getEscapedNewLineSize(&BufferStart[Offset])) { + if (unsigned EscapedNewLineSize = + getEscapedNewLineSize(&BufferStart[Offset])) { // Remember that this token needs to be cleaned. if (Tok) Tok->setFlag(Token::NeedsCleaning); // Warn if there was whitespace between the backslash and newline. - if (BufferStart[Offset] != '\n' && BufferStart[Offset] != '\r' && Tok && !isLexingRawMode()) + if (BufferStart[Offset] != '\n' && BufferStart[Offset] != '\r' && Tok && + !isLexingRawMode()) Diag(Offset, diag::backslash_newline_space); // Found backslash. Parse the char after it. Size += EscapedNewLineSize; - Offset += EscapedNewLineSize; + Offset += EscapedNewLineSize; // Use slow version to accumulate a correct size field. return getCharAndSizeSlow(Offset, Size, Tok); @@ -1368,14 +1369,14 @@ // Otherwise, this is not an escaped newline, just return the slash. return '\\'; - } +} // If this is a trigraph, process it. - if (BufferStart[Offset] == '?' && BufferStart[Offset+1] == '?') { +if (BufferStart[Offset] == '?' && BufferStart[Offset + 1] == '?') { // If this is actually a legal trigraph (not something like "??x"), emit // a trigraph warning. If so, and if trigraphs are enabled, return it. - if (char C = DecodeTrigraphChar(&BufferStart[Offset + 2], Tok ? this : nullptr, - LangOpts.Trigraphs)) { + if (char C = DecodeTrigraphChar(&BufferStart[Offset + 2], + Tok ? this : nullptr, LangOpts.Trigraphs)) { // Remember that this token needs to be cleaned. if (Tok) Tok->setFlag(Token::NeedsCleaning); @@ -1384,7 +1385,7 @@ if (C == '\\') goto Slash; return C; } - } +} // If this is neither, return a single character. ++Size; @@ -1726,8 +1727,8 @@ } Result.setFlag(Token::HasUCN); - if ((UCNOffset - CurOffset == 6 && BufferStart[CurOffset+1] == 'u') || - (UCNOffset - CurOffset == 10 && BufferStart[CurOffset+1] == 'U')) + if ((UCNOffset - CurOffset == 6 && BufferStart[CurOffset + 1] == 'u') || + (UCNOffset - CurOffset == 10 && BufferStart[CurOffset + 1] == 'U')) CurOffset = UCNOffset; else while (CurOffset != UCNOffset) @@ -1736,13 +1737,12 @@ } bool Lexer::tryConsumeIdentifierUTF8Char(unsigned &CurOffset) { - const char* UnicodePtr = &BufferStart[CurOffset]; + const char *UnicodePtr = &BufferStart[CurOffset]; llvm::UTF32 CodePoint; llvm::ConversionResult Result = llvm::convertUTF8Sequence((const llvm::UTF8 **)&UnicodePtr, (const llvm::UTF8 *)&BufferStart[BufferSize], - &CodePoint, - llvm::strictConversion); + &CodePoint, llvm::strictConversion); if (Result != llvm::conversionOK) return false; @@ -1756,7 +1756,8 @@ !PP->isPreprocessedOutput()) diagnoseInvalidUnicodeCodepointInIdentifier( PP->getDiagnostics(), LangOpts, CodePoint, - makeCharRange(*this, CurOffset, UnicodePtr-BufferStart), /*IsFirst=*/false); + makeCharRange(*this, CurOffset, UnicodePtr - BufferStart), + /*IsFirst=*/false); // We got a unicode codepoint that is neither a space nor a // a valid identifier part. Carry on as if the codepoint was // valid for recovery purposes. @@ -1764,20 +1765,22 @@ if (IsExtension) diagnoseExtensionInIdentifier(PP->getDiagnostics(), CodePoint, makeCharRange(*this, CurPtr, UnicodePtr)); - maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, - makeCharRange(*this, CurOffset, UnicodePtr-BufferStart), - /*IsFirst=*/false); - maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), CodePoint, - makeCharRange(*this, CurOffset, UnicodePtr-BufferStart)); + maybeDiagnoseIDCharCompat( + PP->getDiagnostics(), CodePoint, + makeCharRange(*this, CurOffset, UnicodePtr - BufferStart), + /*IsFirst=*/false); + maybeDiagnoseUTF8Homoglyph( + PP->getDiagnostics(), CodePoint, + makeCharRange(*this, CurOffset, UnicodePtr - BufferStart)); } - CurOffset = UnicodePtr-BufferStart; + CurOffset = UnicodePtr - BufferStart; return true; } bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C, unsigned CurOffset) { - bool IsExtension = false; + bool IsExtension = false; if (isAllowedInitiallyIDChar(C, LangOpts, IsExtension)) { if (!isLexingRawMode() && !ParsingPreprocessorDirective && !PP->isPreprocessedOutput()) { @@ -1910,7 +1913,8 @@ char C1 = Lexer::getCharAndSizeNoWarn(&BufferStart[Start], Size, LangOpts); if (C1 != '0') return false; - char C2 = Lexer::getCharAndSizeNoWarn(&BufferStart[Start + Size], Size, LangOpts); + char C2 = + Lexer::getCharAndSizeNoWarn(&BufferStart[Start + Size], Size, LangOpts); return (C2 == 'x' || C2 == 'X'); } @@ -1945,7 +1949,8 @@ if (!isHexaLiteral(BufferOffset, LangOpts)) IsHexFloat = false; else if (!LangOpts.CPlusPlus17 && - std::find(BufferStart + BufferOffset, BufferStart + CurOffset, '_') != BufferStart + CurOffset) + std::find(BufferStart + BufferOffset, BufferStart + CurOffset, + '_') != BufferStart + CurOffset) IsHexFloat = false; } if (IsHexFloat) @@ -1955,12 +1960,13 @@ // If we have a digit separator, continue. if (C == '\'' && (LangOpts.CPlusPlus14 || LangOpts.C2x)) { unsigned NextSize; - char Next = getCharAndSizeNoWarn(&BufferStart[CurOffset + Size], NextSize, LangOpts); + char Next = getCharAndSizeNoWarn(&BufferStart[CurOffset + Size], NextSize, + LangOpts); if (isAsciiIdentifierContinue(Next)) { if (!isLexingRawMode()) Diag(CurOffset, LangOpts.CPlusPlus - ? diag::warn_cxx11_compat_digit_separator - : diag::warn_c2x_compat_digit_separator); + ? diag::warn_cxx11_compat_digit_separator + : diag::warn_c2x_compat_digit_separator); CurOffset = ConsumeChar(CurOffset, Size, Result); CurOffset = ConsumeChar(CurOffset, NextSize, Result); return LexNumericConstant(Result, CurOffset); @@ -1983,7 +1989,7 @@ /// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes /// in C++11, or warn on a ud-suffix in C++98. unsigned Lexer::LexUDSuffix(Token &Result, unsigned CurOffset, - bool IsStringLiteral) { + bool IsStringLiteral) { assert(LangOpts.CPlusPlus); // Maximally munch an identifier. @@ -2005,7 +2011,7 @@ Diag(CurOffset, C == '_' ? diag::warn_cxx11_compat_user_defined_literal : diag::warn_cxx11_compat_reserved_user_defined_literal) - << FixItHint::CreateInsertion(getSourceLocation(CurOffset), " "); + << FixItHint::CreateInsertion(getSourceLocation(CurOffset), " "); return CurOffset; } @@ -2028,7 +2034,8 @@ unsigned Chars = 1; while (true) { unsigned NextSize; - char Next = getCharAndSizeNoWarn(&BufferStart[CurOffset] + Consumed, NextSize, LangOpts); + char Next = getCharAndSizeNoWarn(&BufferStart[CurOffset] + Consumed, + NextSize, LangOpts); if (!isAsciiIdentifierContinue(Next)) { // End of suffix. Check whether this is on the allowed list. const StringRef CompleteSuffix(Buffer, Chars); @@ -2049,8 +2056,8 @@ if (!IsUDSuffix) { if (!isLexingRawMode()) Diag(CurOffset, LangOpts.MSVCCompat - ? diag::ext_ms_reserved_user_defined_literal - : diag::ext_reserved_user_defined_literal) + ? diag::ext_ms_reserved_user_defined_literal + : diag::ext_reserved_user_defined_literal) << FixItHint::CreateInsertion(getSourceLocation(CurOffset), " "); return CurOffset; } @@ -2084,8 +2091,9 @@ (Kind == tok::utf8_string_literal || Kind == tok::utf16_string_literal || Kind == tok::utf32_string_literal)) - Diag(BufferOffset, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal - : diag::warn_c99_compat_unicode_literal); + Diag(BufferOffset, LangOpts.CPlusPlus + ? diag::warn_cxx98_compat_unicode_literal + : diag::warn_c99_compat_unicode_literal); char C = getAndAdvanceChar(CurOffset, Result); while (C != '"') { @@ -2094,18 +2102,19 @@ if (C == '\\') C = getAndAdvanceChar(CurOffset, Result); - if (C == '\n' || C == '\r' || // Newline. - (C == 0 && CurOffset-1 == BufferSize)) { // End of file. + if (C == '\n' || C == '\r' || // Newline. + (C == 0 && CurOffset - 1 == BufferSize)) { // End of file. if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) Diag(BufferOffset, diag::ext_unterminated_char_or_string) << 1; - FormTokenWithChars(Result, CurOffset-1, tok::unknown); + FormTokenWithChars(Result, CurOffset - 1, tok::unknown); return true; } if (C == 0) { - if (isCodeCompletionPoint(CurOffset-1)) { + if (isCodeCompletionPoint(CurOffset - 1)) { if (ParsingFilename) - codeCompleteIncludedFile(AfterQuote, CurOffset - 1, /*IsAngled=*/false); + codeCompleteIncludedFile(AfterQuote, CurOffset - 1, + /*IsAngled=*/false); else PP->CodeCompleteNaturalLanguage(); FormTokenWithChars(Result, CurOffset - 1, tok::unknown); @@ -2113,7 +2122,7 @@ return true; } - NulCharacter = CurOffset-1; + NulCharacter = CurOffset - 1; } C = getAndAdvanceChar(CurOffset, Result); } @@ -2147,7 +2156,8 @@ unsigned PrefixLen = 0; - while (PrefixLen != 16 && isRawStringDelimBody(BufferStart[CurOffset + PrefixLen])) + while (PrefixLen != 16 && + isRawStringDelimBody(BufferStart[CurOffset + PrefixLen])) ++PrefixLen; // If the last character was not a '(', then we didn't lex a valid delimiter. @@ -2158,7 +2168,7 @@ Diag(PrefixEnd, diag::err_raw_delim_too_long); } else { Diag(PrefixEnd, diag::err_invalid_char_raw_delim) - << StringRef(BufferStart + PrefixEnd, 1); + << StringRef(BufferStart + PrefixEnd, 1); } } @@ -2170,7 +2180,7 @@ if (C == '"') break; - if (C == 0 && CurOffset-1 == BufferSize) { + if (C == 0 && CurOffset - 1 == BufferSize) { --CurOffset; break; } @@ -2189,15 +2199,17 @@ if (C == ')') { // Check for prefix match and closing quote. - if (strncmp(&BufferStart[CurOffset], &BufferStart[Prefix], PrefixLen) == 0 && BufferStart[CurOffset + PrefixLen] == '"') { + if (strncmp(&BufferStart[CurOffset], &BufferStart[Prefix], PrefixLen) == + 0 && + BufferStart[CurOffset + PrefixLen] == '"') { CurOffset += PrefixLen + 1; // skip over prefix and '"' break; } - } else if (C == 0 && CurOffset-1 == BufferSize) { // End of file. + } else if (C == 0 && CurOffset - 1 == BufferSize) { // End of file. if (!isLexingRawMode()) Diag(BufferOffset, diag::err_unterminated_raw_string) - << StringRef(BufferStart + Prefix, PrefixLen); - FormTokenWithChars(Result, CurOffset-1, tok::unknown); + << StringRef(BufferStart + Prefix, PrefixLen); + FormTokenWithChars(Result, CurOffset - 1, tok::unknown); return true; } } @@ -2226,7 +2238,7 @@ if (C == '\\') C = getAndAdvanceChar(CurOffset, Result); - if (isVerticalWhitespace(C) || // Newline. + if (isVerticalWhitespace(C) || // Newline. (C == 0 && (CurOffset - 1 == BufferSize))) { // End of file. // If the filename is unterminated, then it must just be a lone < // character. Return this as such. @@ -2236,12 +2248,13 @@ if (C == 0) { if (isCodeCompletionPoint(CurOffset - 1)) { - codeCompleteIncludedFile(AfterLessPos, CurOffset - 1, /*IsAngled=*/true); + codeCompleteIncludedFile(AfterLessPos, CurOffset - 1, + /*IsAngled=*/true); cutOffLexing(); FormTokenWithChars(Result, CurOffset - 1, tok::unknown); return true; } - NulCharacter = CurOffset-1; + NulCharacter = CurOffset - 1; } C = getAndAdvanceChar(CurOffset, Result); } @@ -2258,8 +2271,7 @@ } void Lexer::codeCompleteIncludedFile(unsigned PathStart, - unsigned CompletionPoint, - bool IsAngled) { + unsigned CompletionPoint, bool IsAngled) { // Completion only applies to the filename, after the last slash. StringRef PartialPath(BufferStart + PathStart, CompletionPoint - PathStart); llvm::StringRef SlashChars = LangOpts.MSVCCompat ? "/\\" : "/"; @@ -2269,8 +2281,8 @@ unsigned StartOfFilename = (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1; // Code completion filter range is the filename only, up to completion point. - PP->setCodeCompletionIdentifierInfo(&PP->getIdentifierTable().get( - StringRef(BufferStart + StartOfFilename, CompletionPoint - StartOfFilename))); + PP->setCodeCompletionIdentifierInfo(&PP->getIdentifierTable().get(StringRef( + BufferStart + StartOfFilename, CompletionPoint - StartOfFilename))); // We should replace the characters up to the closing quote or closest slash, // if any. while (CompletionPoint < BufferSize) { @@ -2284,9 +2296,8 @@ break; } - PP->setCodeCompletionTokenRange( - FileLoc.getLocWithOffset(StartOfFilename), - FileLoc.getLocWithOffset(CompletionPoint)); + PP->setCodeCompletionTokenRange(FileLoc.getLocWithOffset(StartOfFilename), + FileLoc.getLocWithOffset(CompletionPoint)); PP->CodeCompleteIncludedFile(Dir, IsAngled); } @@ -2300,8 +2311,8 @@ if (!isLexingRawMode()) { if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant) Diag(BufferOffset, LangOpts.CPlusPlus - ? diag::warn_cxx98_compat_unicode_literal - : diag::warn_c99_compat_unicode_literal); + ? diag::warn_cxx98_compat_unicode_literal + : diag::warn_c99_compat_unicode_literal); else if (Kind == tok::utf8_char_constant) Diag(BufferOffset, diag::warn_cxx14_compat_u8_character_literal); } @@ -2319,23 +2330,23 @@ if (C == '\\') C = getAndAdvanceChar(CurOffset, Result); - if (C == '\n' || C == '\r' || // Newline. - (C == 0 && CurOffset-1 == BufferSize)) { // End of file. + if (C == '\n' || C == '\r' || // Newline. + (C == 0 && CurOffset - 1 == BufferSize)) { // End of file. if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) Diag(BufferOffset, diag::ext_unterminated_char_or_string) << 0; - FormTokenWithChars(Result, CurOffset-1, tok::unknown); + FormTokenWithChars(Result, CurOffset - 1, tok::unknown); return true; } if (C == 0) { - if (isCodeCompletionPoint(CurOffset-1)) { + if (isCodeCompletionPoint(CurOffset - 1)) { PP->CodeCompleteNaturalLanguage(); - FormTokenWithChars(Result, CurOffset-1, tok::unknown); + FormTokenWithChars(Result, CurOffset - 1, tok::unknown); cutOffLexing(); return true; } - NulCharacter = CurOffset-1; + NulCharacter = CurOffset - 1; } C = getAndAdvanceChar(CurOffset, Result); } @@ -2362,7 +2373,7 @@ bool Lexer::SkipWhitespace(Token &Result, unsigned CurOffset, bool &TokAtPhysicalStartOfLine) { // Whitespace - Skip it, then return the token after the whitespace. - bool SawNewline = isVerticalWhitespace(BufferStart[CurOffset-1]); + bool SawNewline = isVerticalWhitespace(BufferStart[CurOffset - 1]); unsigned char Char = BufferStart[CurOffset]; @@ -2410,7 +2421,7 @@ } // If this isn't immediately after a newline, there is leading space. - char PrevChar = BufferStart[CurOffset-1]; + char PrevChar = BufferStart[CurOffset - 1]; bool HasLeadingSpace = !isVerticalWhitespace(PrevChar); Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace); @@ -2418,10 +2429,12 @@ Result.setFlag(Token::StartOfLine); TokAtPhysicalStartOfLine = true; - if (NewLineOffset.hasValue() && lastNewLine.hasValue() && *NewLineOffset != *lastNewLine && PP) { + if (NewLineOffset.hasValue() && lastNewLine.hasValue() && + *NewLineOffset != *lastNewLine && PP) { if (auto *Handler = PP->getEmptylineHandler()) - Handler->HandleEmptyline(SourceRange(getSourceLocation(*NewLineOffset + 1), - getSourceLocation(*lastNewLine))); + Handler->HandleEmptyline( + SourceRange(getSourceLocation(*NewLineOffset + 1), + getSourceLocation(*lastNewLine))); } } @@ -2472,8 +2485,9 @@ } if (!isASCII(C)) { - unsigned Length = llvm::getUTF8SequenceSize((const llvm::UTF8 *)&BufferStart[CurOffset], - (const llvm::UTF8 *)&BufferStart[BufferSize]); + unsigned Length = llvm::getUTF8SequenceSize( + (const llvm::UTF8 *)&BufferStart[CurOffset], + (const llvm::UTF8 *)&BufferStart[BufferSize]); if (Length == 0) { if (!UnicodeDecodingAlreadyDiagnosed && !isLexingRawMode()) Diag(CurOffset, diag::warn_invalid_utf8_in_comment); @@ -2489,9 +2503,10 @@ unsigned NextLine = CurOffset; if (C != 0) { // We found a newline, see if it's escaped. - unsigned EscapeOffset = CurOffset-1; + unsigned EscapeOffset = CurOffset - 1; bool HasSpace = false; - while (isHorizontalWhitespace(BufferStart[EscapeOffset])) { // Skip whitespace. + while (isHorizontalWhitespace( + BufferStart[EscapeOffset])) { // Skip whitespace. --EscapeOffset; HasSpace = true; } @@ -2499,10 +2514,11 @@ if (BufferStart[EscapeOffset] == '\\') // Escaped newline. CurOffset = EscapeOffset; - else if (BufferStart[EscapeOffset] == '/' && BufferStart[EscapeOffset-1] == '?' && - BufferStart[EscapeOffset-2] == '?' && LangOpts.Trigraphs) + else if (BufferStart[EscapeOffset] == '/' && + BufferStart[EscapeOffset - 1] == '?' && + BufferStart[EscapeOffset - 2] == '?' && LangOpts.Trigraphs) // Trigraph-escaped newline. - CurOffset = EscapeOffset-2; + CurOffset = EscapeOffset - 2; else break; // This is a newline, we're done. @@ -2523,7 +2539,7 @@ // If we only read only one character, then no special handling is needed. // We're done and can skip forward to the newline. - if (C != 0 && CurOffset == OldOffset+1) { + if (C != 0 && CurOffset == OldOffset + 1) { CurOffset = NextLine; break; } @@ -2539,14 +2555,15 @@ // line is also a // comment, but has spaces, don't emit a diagnostic. if (isWhitespace(C)) { unsigned ForwardOffset = CurOffset; - while (isWhitespace(BufferStart[ForwardOffset])) // Skip whitespace. + while (isWhitespace(BufferStart[ForwardOffset])) // Skip whitespace. ++ForwardOffset; - if (BufferStart[ForwardOffset] == '/' && BufferStart[ForwardOffset+1] == '/') + if (BufferStart[ForwardOffset] == '/' && + BufferStart[ForwardOffset + 1] == '/') break; } if (!isLexingRawMode()) - Diag(OldOffset-1, diag::ext_multi_line_line_comment); + Diag(OldOffset - 1, diag::ext_multi_line_line_comment); break; } } @@ -2556,7 +2573,7 @@ break; } - if (C == '\0' && isCodeCompletionPoint(CurOffset-1)) { + if (C == '\0' && isCodeCompletionPoint(CurOffset - 1)) { PP->CodeCompleteNaturalLanguage(); cutOffLexing(); return false; @@ -2629,7 +2646,7 @@ /// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline /// character (either \\n or \\r) is part of an escaped newline sequence. Issue /// a diagnostic if so. We know that the newline is inside of a block comment. -static bool isEndOfBlockCommentWithEscapedNewLine(const char* CurPtr, Lexer *L, +static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs) { assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); @@ -2683,20 +2700,23 @@ // ignore this * character. if (!Trigraphs) { if (!L->isLexingRawMode()) - L->Diag(TrigraphPos - L->getBuffer().data(), diag::trigraph_ignored_block_comment); + L->Diag(TrigraphPos - L->getBuffer().data(), + diag::trigraph_ignored_block_comment); return false; } if (!L->isLexingRawMode()) - L->Diag(TrigraphPos- L->getBuffer().data(), diag::trigraph_ends_block_comment); + L->Diag(TrigraphPos - L->getBuffer().data(), + diag::trigraph_ends_block_comment); } // Warn about having an escaped newline between the */ characters. if (!L->isLexingRawMode()) - L->Diag(CurPtr+ 1- L->getBuffer().data(), diag::escaped_newline_block_comment_end); + L->Diag(CurPtr + 1 - L->getBuffer().data(), + diag::escaped_newline_block_comment_end); // If there was space between the backslash and newline, warn about it. if (SpacePos && !L->isLexingRawMode()) - L->Diag(SpacePos- L->getBuffer().data(), diag::backslash_newline_space); + L->Diag(SpacePos - L->getBuffer().data(), diag::backslash_newline_space); return true; } @@ -2730,7 +2750,7 @@ unsigned CharSize; unsigned char C = getCharAndSize(CurOffset, CharSize); CurOffset += CharSize; - if (C == 0 && CurOffset == BufferSize+1) { + if (C == 0 && CurOffset == BufferSize + 1) { if (!isLexingRawMode()) Diag(BufferOffset, diag::err_unterminated_block_comment); --CurOffset; @@ -2775,13 +2795,14 @@ #ifdef __SSE2__ __m128i Slashes = _mm_set1_epi8('/'); while (CurOffset + 16 < BufferSize) { - int Mask = _mm_movemask_epi8(*(const __m128i *)(BufferStart + CurOffset)); + int Mask = + _mm_movemask_epi8(*(const __m128i *)(BufferStart + CurOffset)); if (LLVM_UNLIKELY(Mask != 0)) { goto MultiByteUTF8; } // look for slashes - int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)(BufferStart + CurOffset), - Slashes)); + int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8( + *(const __m128i *)(BufferStart + CurOffset), Slashes)); if (cmp != 0) { // Adjust the pointer to point directly after the first slash. It's // not necessary to set C here, it will be overwritten at the end of @@ -2813,14 +2834,14 @@ while (CurOffset + 16 < BufferSize) { bool HasNonASCII = false; for (unsigned I = 0; I < 16; ++I) - HasNonASCII |= !isASCII(BufferStart[CurOffset+I]); + HasNonASCII |= !isASCII(BufferStart[CurOffset + I]); if (LLVM_UNLIKELY(HasNonASCII)) goto MultiByteUTF8; bool HasSlash = false; for (unsigned I = 0; I < 16; ++I) - HasSlash |= BufferStart[CurOffset+I] == '/'; + HasSlash |= BufferStart[CurOffset + I] == '/'; if (HasSlash) break; CurOffset += 16; @@ -2844,7 +2865,8 @@ // CurPtr is 1 code unit past C, so to decode // the codepoint, we need to read from the previous position. unsigned Length = llvm::getUTF8SequenceSize( - (const llvm::UTF8 *)(BufferStart + CurOffset) - 1, (const llvm::UTF8 *)(BufferStart + BufferSize)); + (const llvm::UTF8 *)(BufferStart + CurOffset) - 1, + (const llvm::UTF8 *)(BufferStart + BufferSize)); if (Length == 0) { if (!UnicodeDecodingAlreadyDiagnosed && !isLexingRawMode()) Diag(CurOffset - 1, diag::warn_invalid_utf8_in_comment); @@ -2858,25 +2880,27 @@ if (C == '/') { FoundSlash: - if (BufferStart[CurOffset-2] == '*') // We found the final */. We're done! + if (BufferStart[CurOffset - 2] == + '*') // We found the final */. We're done! break; - if ((BufferStart[CurOffset-2] == '\n' || BufferStart[CurOffset-2] == '\r')) { - if (isEndOfBlockCommentWithEscapedNewLine(&BufferStart[CurOffset - 2], this, - LangOpts.Trigraphs)) { + if ((BufferStart[CurOffset - 2] == '\n' || + BufferStart[CurOffset - 2] == '\r')) { + if (isEndOfBlockCommentWithEscapedNewLine(&BufferStart[CurOffset - 2], + this, LangOpts.Trigraphs)) { // We found the final */, though it had an escaped newline between the // * and /. We're done! break; } } - if (BufferStart[CurOffset] == '*' && BufferStart[CurOffset+1] != '/') { + if (BufferStart[CurOffset] == '*' && BufferStart[CurOffset + 1] != '/') { // If this is a /* inside of the comment, emit a warning. Don't do this // if this is a /*/, which will end the comment. This misses cases with // embedded escaped newlines, but oh well. if (!isLexingRawMode()) - Diag(CurOffset-1, diag::warn_nested_block_comment); + Diag(CurOffset - 1, diag::warn_nested_block_comment); } - } else if (C == 0 && CurOffset == BufferSize+1) { + } else if (C == 0 && CurOffset == BufferSize + 1) { if (!isLexingRawMode()) Diag(BufferOffset, diag::err_unterminated_block_comment); // Note: the user probably forgot a */. We could continue immediately @@ -2893,27 +2917,27 @@ BufferOffset = CurOffset; return false; - } else if (C == '\0' && isCodeCompletionPoint(CurOffset-1)) { + } else if (C == '\0' && isCodeCompletionPoint(CurOffset - 1)) { PP->CodeCompleteNaturalLanguage(); cutOffLexing(); return false; - } + } - C = BufferStart[CurOffset++]; + C = BufferStart[CurOffset++]; } // Notify comment handlers about the comment unless we're in a #if 0 block. if (PP && !isLexingRawMode() && PP->HandleComment(Result, SourceRange(getSourceLocation(BufferOffset), getSourceLocation(CurOffset)))) { - BufferOffset = CurOffset; - return true; // A token has to be returned. + BufferOffset = CurOffset; + return true; // A token has to be returned. } // If we are returning comments as tokens, return this comment as a token. if (inKeepCommentMode()) { - FormTokenWithChars(Result, CurOffset, tok::comment); - return true; + FormTokenWithChars(Result, CurOffset, tok::comment); + return true; } // It is common for the tokens immediately after a /**/ comment to be @@ -2921,8 +2945,8 @@ // efficiently now. This is safe even in KeepWhitespaceMode because we would // have already returned above with the comment as a token. if (isHorizontalWhitespace(BufferStart[CurOffset])) { - SkipWhitespace(Result, CurOffset+1, TokAtPhysicalStartOfLine); - return false; + SkipWhitespace(Result, CurOffset + 1, TokAtPhysicalStartOfLine); + return false; } // Otherwise, just return so that the next character will be lexed as a token. @@ -2946,16 +2970,16 @@ // CurPtr - Cache BufferPtr in an automatic variable. unsigned CurOffset = BufferOffset; while (true) { - char Char = getAndAdvanceChar(CurOffset, Tmp); - switch (Char) { - default: + char Char = getAndAdvanceChar(CurOffset, Tmp); + switch (Char) { + default: if (Result) Result->push_back(Char); break; case 0: // Null. // Found end of file? - if (CurOffset-1 != BufferSize) { - if (isCodeCompletionPoint(CurOffset-1)) { + if (CurOffset - 1 != BufferSize) { + if (isCodeCompletionPoint(CurOffset - 1)) { PP->CodeCompleteNaturalLanguage(); cutOffLexing(); return; @@ -2971,8 +2995,8 @@ case '\r': case '\n': // Okay, we found the end of the line. First, back up past the \0, \r, \n. - assert(BufferStart[CurOffset-1] == Char && "Trigraphs for newline?"); - BufferOffset = CurOffset-1; + assert(BufferStart[CurOffset - 1] == Char && "Trigraphs for newline?"); + BufferOffset = CurOffset - 1; // Next, lex the character, which should handle the EOD transition. Lex(Tmp); @@ -3040,7 +3064,8 @@ // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue // a pedwarn. - if (CurOffset != 0 && (BufferStart[CurOffset-1] != '\n' && BufferStart[CurOffset-1] != '\r')) { + if (CurOffset != 0 && (BufferStart[CurOffset - 1] != '\n' && + BufferStart[CurOffset - 1] != '\r')) { DiagnosticsEngine &Diags = PP->getDiagnostics(); SourceLocation EndLoc = getSourceLocation(BufferSize); unsigned DiagID; @@ -3058,8 +3083,7 @@ DiagID = diag::ext_no_newline_eof; } - Diag(BufferSize, DiagID) - << FixItHint::CreateInsertion(EndLoc, "\n"); + Diag(BufferSize, DiagID) << FixItHint::CreateInsertion(EndLoc, "\n"); } BufferOffset = CurOffset; @@ -3113,7 +3137,7 @@ } /// Find the end of a version control conflict marker. -static const char *FindConflictEnd(const char* CurPtr, const char *BufferEnd, +static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK) { const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>"; size_t TermLen = CMK == CMK_Perforce ? 5 : 7; @@ -3138,13 +3162,15 @@ /// if not. bool Lexer::IsStartOfConflictMarker(unsigned CurOffset) { // Only a conflict marker if it starts at the beginning of a line. - if (CurOffset != 0 && - BufferStart[CurOffset-1] != '\n' && BufferStart[CurOffset-1] != '\r') + if (CurOffset != 0 && BufferStart[CurOffset - 1] != '\n' && + BufferStart[CurOffset - 1] != '\r') return false; // Check to see if we have <<<<<<< or >>>>. - if (!StringRef(BufferStart+CurOffset, BufferSize - CurOffset).startswith("<<<<<<<") && - !StringRef(BufferStart+CurOffset, BufferSize - CurOffset).startswith(">>>> ")) + if (!StringRef(BufferStart + CurOffset, BufferSize - CurOffset) + .startswith("<<<<<<<") && + !StringRef(BufferStart + CurOffset, BufferSize - CurOffset) + .startswith(">>>> ")) return false; // If we have a situation where we don't care about conflict markers, ignore @@ -3152,11 +3178,13 @@ if (CurrentConflictMarkerState || isLexingRawMode()) return false; - ConflictMarkerKind Kind = BufferStart[CurOffset] == '<' ? CMK_Normal : CMK_Perforce; + ConflictMarkerKind Kind = + BufferStart[CurOffset] == '<' ? CMK_Normal : CMK_Perforce; // Check to see if there is an ending marker somewhere in the buffer at the // start of a line to terminate this conflict marker. - if (FindConflictEnd(&BufferStart[CurOffset], &BufferStart[BufferSize], Kind)) { + if (FindConflictEnd(&BufferStart[CurOffset], &BufferStart[BufferSize], + Kind)) { // We found a match. We are really in a conflict marker. // Diagnose this, and ignore to the end of line. Diag(CurOffset, diag::err_conflict_marker); @@ -3182,8 +3210,8 @@ /// the line. This returns true if it is a conflict marker and false if not. bool Lexer::HandleEndOfConflictMarker(unsigned CurOffset) { // Only a conflict marker if it starts at the beginning of a line. - if (CurOffset != 0 && - BufferStart[CurOffset-1] != '\n' && BufferStart[CurOffset-1] != '\r') + if (CurOffset != 0 && BufferStart[CurOffset - 1] != '\n' && + BufferStart[CurOffset - 1] != '\r') return false; // If we have a situation where we don't care about conflict markers, ignore @@ -3199,12 +3227,14 @@ // If we do have it, search for the end of the conflict marker. This could // fail if it got skipped with a '#if 0' or something. Note that CurPtr might // be the end of conflict marker. - if (const char *End = FindConflictEnd(BufferStart + CurOffset, BufferStart + BufferSize, - CurrentConflictMarkerState)) { + if (const char *End = + FindConflictEnd(BufferStart + CurOffset, BufferStart + BufferSize, + CurrentConflictMarkerState)) { CurOffset = End - BufferStart; // Skip ahead to the end of line. - while (CurOffset != BufferSize && BufferStart[CurOffset] != '\r' && BufferStart[CurOffset] != '\n') + while (CurOffset != BufferSize && BufferStart[CurOffset] != '\r' && + BufferStart[CurOffset] != '\n') ++CurOffset; BufferOffset = CurOffset; @@ -3217,7 +3247,7 @@ return false; } -static const char *findPlaceholderEnd(const char* CurPtr, +static const char *findPlaceholderEnd(const char *CurPtr, const char *BufferEnd) { if (CurPtr == BufferEnd) return nullptr; @@ -3230,10 +3260,12 @@ } bool Lexer::lexEditorPlaceholder(Token &Result, unsigned CurOffset) { - assert(BufferStart[CurOffset-1] == '<' && BufferStart[CurOffset] == '#' && "Not a placeholder!"); + assert(BufferStart[CurOffset - 1] == '<' && BufferStart[CurOffset] == '#' && + "Not a placeholder!"); if (!PP || !PP->getPreprocessorOpts().LexEditorPlaceholders || LexingRawMode) return false; - const char *End = findPlaceholderEnd(BufferStart + CurOffset + 1, BufferStart + BufferSize); + const char *End = + findPlaceholderEnd(BufferStart + CurOffset + 1, BufferStart + BufferSize); if (!End) return false; const char *Start = BufferStart + CurOffset - 1; @@ -3258,8 +3290,8 @@ } std::optional Lexer::tryReadNumericUCN(unsigned &StartOffset, - unsigned SlashLoc, - Token *Result) { + unsigned SlashLoc, + Token *Result) { unsigned CharSize; char Kind = getCharAndSize(StartOffset, CharSize); assert((Kind == 'u' || Kind == 'U') && "expected a UCN"); @@ -3282,7 +3314,7 @@ } unsigned CurOffset = StartOffset + CharSize; - unsigned KindLoc = CurOffset-1; + unsigned KindLoc = CurOffset - 1; uint32_t CodePoint = 0; while (Count != NumHexDigits || Delimited) { @@ -3305,7 +3337,7 @@ break; if (Diagnose) Diag(SlashLoc, diag::warn_delimited_ucn_incomplete) - << StringRef(BufferStart+KindLoc, 1); + << StringRef(BufferStart + KindLoc, 1); return llvm::None; } @@ -3323,11 +3355,12 @@ if (Count == 0) { if (Diagnose) - if (Delimited && Kind == 'U') { - if (Diagnose) - Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(BufferStart+KindLoc, 1); - return llvm::None; - } + if (Delimited && Kind == 'U') { + if (Diagnose) + Diag(SlashLoc, diag::err_hex_escape_no_digits) + << StringRef(BufferStart + KindLoc, 1); + return llvm::None; + } if (!Delimited && Count != NumHexDigits) { if (Diagnose) { @@ -3344,8 +3377,8 @@ if (Delimited && PP) { Diag(SlashLoc, PP->getLangOpts().CPlusPlus2b - ? diag::warn_cxx2b_delimited_escape_sequence - : diag::ext_delimited_escape_sequence) + ? diag::warn_cxx2b_delimited_escape_sequence + : diag::ext_delimited_escape_sequence) << /*delimited*/ 0 << (PP->getLangOpts().CPlusPlus ? 1 : 0); } @@ -3365,9 +3398,8 @@ return CodePoint; } -llvm::Optional Lexer::tryReadNamedUCN(unsigned &StartOffset, - unsigned SlashLoc, - Token *Result) { +llvm::Optional Lexer::tryReadNamedUCN( + unsigned &StartOffset, unsigned SlashLoc, Token *Result) { unsigned CharSize; bool Diagnose = Result && !isLexingRawMode(); @@ -3404,7 +3436,7 @@ if (Diagnose) Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty : diag::warn_delimited_ucn_incomplete) - << StringRef(BufferStart+KindLoc, 1); + << StringRef(BufferStart + KindLoc, 1); return llvm::None; } @@ -3432,8 +3464,8 @@ if (Diagnose && Match) Diag(SlashLoc, PP->getLangOpts().CPlusPlus2b - ? diag::warn_cxx2b_delimited_escape_sequence - : diag::ext_delimited_escape_sequence) + ? diag::warn_cxx2b_delimited_escape_sequence + : diag::ext_delimited_escape_sequence) << /*named*/ 1 << (PP->getLangOpts().CPlusPlus ? 1 : 0); // If no diagnostic has been emitted yet, likely because we are doing a @@ -3448,7 +3480,7 @@ // If the UCN contains either a trigraph or a line splicing, // we need to call getAndAdvanceChar again to set the appropriate flags // on Result. - if (CurOffset - StartOffset == (ptrdiff_t)(Buffer.size() + )) + if (CurOffset - StartOffset == (ptrdiff_t)(Buffer.size() +)) StartOffset = CurOffset; else while (StartOffset != CurOffset) @@ -3522,12 +3554,12 @@ return CodePoint; } -bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C, +bool Lexer::CheckUnicodeWhitespace(Token & Result, uint32_t C, unsigned CurOffset) { if (!isLexingRawMode() && !PP->isPreprocessedOutput() && isUnicodeWhitespace(C)) { Diag(BufferOffset, diag::ext_unicode_whitespace) - << makeCharRange(*this, BufferOffset, CurOffset); + << makeCharRange(*this, BufferOffset, CurOffset); Result.setFlag(Token::LeadingSpace); return true; @@ -3618,11 +3650,11 @@ switch (Char) { case 0: // Null. // Found end of file? - if (CurOffset-1 == BufferSize) - return LexEndOfFile(Result, CurOffset-1); + if (CurOffset - 1 == BufferSize) + return LexEndOfFile(Result, CurOffset - 1); // Check if we are performing code completion. - if (isCodeCompletionPoint(CurOffset-1)) { + if (isCodeCompletionPoint(CurOffset - 1)) { // Return the code-completion token. Result.startToken(); FormTokenWithChars(Result, CurOffset, tok::code_completion); @@ -3630,7 +3662,7 @@ } if (!isLexingRawMode()) - Diag(CurOffset-1, diag::null_in_file); + Diag(CurOffset - 1, diag::null_in_file); Result.setFlag(Token::LeadingSpace); if (SkipWhitespace(Result, CurOffset, TokAtPhysicalStartOfLine)) return true; // KeepWhitespaceMode @@ -3643,8 +3675,8 @@ // If we're in Microsoft extensions mode, treat this as end of file. if (LangOpts.MicrosoftExt) { if (!isLexingRawMode()) - Diag(CurOffset-1, diag::ext_ctrl_z_eof_microsoft); - return LexEndOfFile(Result, CurOffset-1); + Diag(CurOffset - 1, diag::ext_ctrl_z_eof_microsoft); + return LexEndOfFile(Result, CurOffset - 1); } // If Microsoft extensions are disabled, this is just random garbage. @@ -3698,13 +3730,15 @@ // If the next token is obviously a // or /* */ comment, skip it efficiently // too (without going through the big switch stmt). - if (BufferStart[CurOffset] == '/' && BufferStart[CurOffset+1] == '/' && !inKeepCommentMode() && - LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) { - if (SkipLineComment(Result, CurOffset+2, TokAtPhysicalStartOfLine)) + if (BufferStart[CurOffset] == '/' && BufferStart[CurOffset + 1] == '/' && + !inKeepCommentMode() && LineComment && + (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) { + if (SkipLineComment(Result, CurOffset + 2, TokAtPhysicalStartOfLine)) return true; // There is a token to return. goto SkipIgnoredUnits; - } else if (BufferStart[CurOffset] == '/' && BufferStart[CurOffset+1] == '*' && !inKeepCommentMode()) { - if (SkipBlockComment(Result, CurOffset+2, TokAtPhysicalStartOfLine)) + } else if (BufferStart[CurOffset] == '/' && + BufferStart[CurOffset + 1] == '*' && !inKeepCommentMode()) { + if (SkipBlockComment(Result, CurOffset + 2, TokAtPhysicalStartOfLine)) return true; // There is a token to return. goto SkipIgnoredUnits; } else if (isHorizontalWhitespace(BufferStart[CurOffset])) { @@ -3745,24 +3779,27 @@ // UTF-16 raw string literal if (Char == 'R' && LangOpts.CPlusPlus11 && getCharAndSize(CurOffset + SizeTmp, SizeTmp2) == '"') - return LexRawStringLiteral(Result, - ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result), - tok::utf16_string_literal); + return LexRawStringLiteral( + Result, + ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), SizeTmp2, + Result), + tok::utf16_string_literal); if (Char == '8') { char Char2 = getCharAndSize(CurOffset + SizeTmp, SizeTmp2); // UTF-8 string literal if (Char2 == '"') - return LexStringLiteral(Result, - ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result), - tok::utf8_string_literal); + return LexStringLiteral( + Result, + ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), SizeTmp2, + Result), + tok::utf8_string_literal); if (Char2 == '\'' && (LangOpts.CPlusPlus17 || LangOpts.C2x)) return LexCharConstant( - Result, ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result), + Result, + ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), SizeTmp2, + Result), tok::utf8_char_constant); if (Char2 == 'R' && LangOpts.CPlusPlus11) { @@ -3770,11 +3807,13 @@ char Char3 = getCharAndSize(CurOffset + SizeTmp + SizeTmp2, SizeTmp3); // UTF-8 raw string literal if (Char3 == '"') { - return LexRawStringLiteral(Result, - ConsumeChar(ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result), - SizeTmp3, Result), - tok::utf8_string_literal); + return LexRawStringLiteral( + Result, + ConsumeChar( + ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), + SizeTmp2, Result), + SizeTmp3, Result), + tok::utf8_string_literal); } } } @@ -3803,10 +3842,11 @@ // UTF-32 raw string literal if (Char == 'R' && LangOpts.CPlusPlus11 && getCharAndSize(CurOffset + SizeTmp, SizeTmp2) == '"') - return LexRawStringLiteral(Result, - ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result), - tok::utf32_string_literal); + return LexRawStringLiteral( + Result, + ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), SizeTmp2, + Result), + tok::utf32_string_literal); } // treat U like the start of an identifier. @@ -3841,10 +3881,11 @@ // Wide raw string literal. if (LangOpts.CPlusPlus11 && Char == 'R' && getCharAndSize(CurOffset + SizeTmp, SizeTmp2) == '"') - return LexRawStringLiteral(Result, - ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result), - tok::wide_string_literal); + return LexRawStringLiteral( + Result, + ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), SizeTmp2, + Result), + tok::wide_string_literal); // Wide character constant. if (Char == '\'') @@ -3870,7 +3911,7 @@ case '$': // $ in identifiers. if (LangOpts.DollarIdents) { if (!isLexingRawMode()) - Diag(CurOffset-1, diag::ext_dollar_in_identifier); + Diag(CurOffset - 1, diag::ext_dollar_in_identifier); // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); return LexIdentifierContinue(Result, CurOffset); @@ -3921,15 +3962,16 @@ // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - return LexNumericConstant(Result, ConsumeChar(CurOffset, SizeTmp, Result)); + return LexNumericConstant(Result, + ConsumeChar(CurOffset, SizeTmp, Result)); } else if (LangOpts.CPlusPlus && Char == '*') { Kind = tok::periodstar; CurOffset += SizeTmp; } else if (Char == '.' && - getCharAndSize(CurOffset+SizeTmp, SizeTmp2) == '.') { + getCharAndSize(CurOffset + SizeTmp, SizeTmp2) == '.') { Kind = tok::ellipsis; - CurOffset = ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result); + CurOffset = ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), SizeTmp2, + Result); } else { Kind = tok::period; } @@ -3972,14 +4014,15 @@ CurOffset = ConsumeChar(CurOffset, SizeTmp, Result); Kind = tok::minusminus; } else if (Char == '>' && LangOpts.CPlusPlus && - getCharAndSize(CurOffset+SizeTmp, SizeTmp2) == '*') { // C++ ->* - CurOffset = ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result); + getCharAndSize(CurOffset + SizeTmp, SizeTmp2) == + '*') { // C++ ->* + CurOffset = ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), SizeTmp2, + Result); Kind = tok::arrowstar; - } else if (Char == '>') { // -> + } else if (Char == '>') { // -> CurOffset = ConsumeChar(CurOffset, SizeTmp, Result); Kind = tok::arrow; - } else if (Char == '=') { // -= + } else if (Char == '=') { // -= CurOffset = ConsumeChar(CurOffset, SizeTmp, Result); Kind = tok::minusequal; } else { @@ -4013,7 +4056,7 @@ LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP); if (!TreatAsComment) if (!(PP && PP->isPreprocessedOutput())) - TreatAsComment = getCharAndSize(CurOffset+SizeTmp, SizeTmp2) != '*'; + TreatAsComment = getCharAndSize(CurOffset + SizeTmp, SizeTmp2) != '*'; if (TreatAsComment) { if (SkipLineComment(Result, ConsumeChar(CurOffset, SizeTmp, Result), @@ -4055,16 +4098,16 @@ } else if (LangOpts.Digraphs && Char == ':') { CurOffset = ConsumeChar(CurOffset, SizeTmp, Result); Char = getCharAndSize(CurOffset, SizeTmp); - if (Char == '%' && getCharAndSize(CurOffset+SizeTmp, SizeTmp2) == ':') { + if (Char == '%' && getCharAndSize(CurOffset + SizeTmp, SizeTmp2) == ':') { Kind = tok::hashhash; // '%:%:' -> '##' CurOffset = ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result); - } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize + SizeTmp2, Result); + } else if (Char == '@' && LangOpts.MicrosoftExt) { // %:@ -> #@ -> Charize CurOffset = ConsumeChar(CurOffset, SizeTmp, Result); if (!isLexingRawMode()) Diag(BufferOffset, diag::ext_charize_microsoft); Kind = tok::hashat; - } else { // '%:' -> '#' + } else { // '%:' -> '#' // We parsed a # character. If this occurs at the start of the line, // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. @@ -4083,35 +4126,35 @@ if (ParsingFilename) { return LexAngledStringLiteral(Result, CurOffset); } else if (Char == '<') { - char After = getCharAndSize(CurOffset+SizeTmp, SizeTmp2); + char After = getCharAndSize(CurOffset + SizeTmp, SizeTmp2); if (After == '=') { Kind = tok::lesslessequal; CurOffset = ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result); - } else if (After == '<' && IsStartOfConflictMarker(CurOffset-1)) { + SizeTmp2, Result); + } else if (After == '<' && IsStartOfConflictMarker(CurOffset - 1)) { // If this is actually a '<<<<<<<' version control conflict marker, // recognize it as such and recover nicely. goto LexNextToken; - } else if (After == '<' && HandleEndOfConflictMarker(CurOffset-1)) { + } else if (After == '<' && HandleEndOfConflictMarker(CurOffset - 1)) { // If this is '<<<<' and we're in a Perforce-style conflict marker, // ignore it. goto LexNextToken; } else if (LangOpts.CUDA && After == '<') { Kind = tok::lesslessless; CurOffset = ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result); + SizeTmp2, Result); } else { CurOffset = ConsumeChar(CurOffset, SizeTmp, Result); Kind = tok::lessless; } } else if (Char == '=') { - char After = getCharAndSize(CurOffset+SizeTmp, SizeTmp2); + char After = getCharAndSize(CurOffset + SizeTmp, SizeTmp2); if (After == '>') { if (LangOpts.CPlusPlus20) { if (!isLexingRawMode()) - Diag(BufferOffset, diag::warn_cxx17_compat_spaceship); + Diag(BufferOffset, diag::warn_cxx17_compat_spaceship); CurOffset = ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result); + SizeTmp2, Result); Kind = tok::spaceship; break; } @@ -4119,8 +4162,8 @@ // change in semantics if this turns up in C++ <=17 mode. if (LangOpts.CPlusPlus && !isLexingRawMode()) { Diag(BufferOffset, diag::warn_cxx20_compat_spaceship) - << FixItHint::CreateInsertion( - getSourceLocation(CurOffset + SizeTmp, SizeTmp2), " "); + << FixItHint::CreateInsertion( + getSourceLocation(CurOffset + SizeTmp, SizeTmp2), " "); } } CurOffset = ConsumeChar(CurOffset, SizeTmp, Result); @@ -4138,7 +4181,7 @@ if (After != ':' && After != '>') { Kind = tok::less; if (!isLexingRawMode()) - Diag(BufferOffset, diag::warn_cxx98_compat_less_colon_colon); + Diag(BufferOffset, diag::warn_cxx98_compat_less_colon_colon); break; } } @@ -4161,22 +4204,22 @@ CurOffset = ConsumeChar(CurOffset, SizeTmp, Result); Kind = tok::greaterequal; } else if (Char == '>') { - char After = getCharAndSize(CurOffset+SizeTmp, SizeTmp2); + char After = getCharAndSize(CurOffset + SizeTmp, SizeTmp2); if (After == '=') { CurOffset = ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result); + SizeTmp2, Result); Kind = tok::greatergreaterequal; - } else if (After == '>' && IsStartOfConflictMarker(CurOffset-1)) { + } else if (After == '>' && IsStartOfConflictMarker(CurOffset - 1)) { // If this is actually a '>>>>' conflict marker, recognize it as such // and recover nicely. goto LexNextToken; - } else if (After == '>' && HandleEndOfConflictMarker(CurOffset-1)) { + } else if (After == '>' && HandleEndOfConflictMarker(CurOffset - 1)) { // If this is '>>>>>>>' and we're in a conflict marker, ignore it. goto LexNextToken; } else if (LangOpts.CUDA && After == '>') { Kind = tok::greatergreatergreater; CurOffset = ConsumeChar(ConsumeChar(CurOffset, SizeTmp, Result), - SizeTmp2, Result); + SizeTmp2, Result); } else { CurOffset = ConsumeChar(CurOffset, SizeTmp, Result); Kind = tok::greatergreater; @@ -4204,7 +4247,8 @@ CurOffset = ConsumeChar(CurOffset, SizeTmp, Result); } else if (Char == '|') { // If this is '|||||||' and we're in a conflict marker, ignore it. - if (BufferStart[CurOffset+1] == '|' && HandleEndOfConflictMarker(CurOffset-1)) + if (BufferStart[CurOffset + 1] == '|' && + HandleEndOfConflictMarker(CurOffset - 1)) goto LexNextToken; Kind = tok::pipepipe; CurOffset = ConsumeChar(CurOffset, SizeTmp, Result); @@ -4233,7 +4277,8 @@ Char = getCharAndSize(CurOffset, SizeTmp); if (Char == '=') { // If this is '====' and we're in a conflict marker, ignore it. - if (BufferStart[CurOffset+1] == '=' && HandleEndOfConflictMarker(CurOffset-1)) + if (BufferStart[CurOffset + 1] == '=' && + HandleEndOfConflictMarker(CurOffset - 1)) goto LexNextToken; Kind = tok::equalequal; @@ -4269,7 +4314,7 @@ case '@': // Objective C support. - if (BufferStart[CurOffset-1] == '@' && LangOpts.ObjC) + if (BufferStart[CurOffset - 1] == '@' && LangOpts.ObjC) Kind = tok::at; else Kind = tok::unknown; @@ -4281,7 +4326,7 @@ if (uint32_t CodePoint = tryReadUCN(CurOffset, BufferOffset, &Result)) { if (CheckUnicodeWhitespace(Result, CodePoint, CurOffset)) { if (SkipWhitespace(Result, CurOffset, TokAtPhysicalStartOfLine)) - return true; // KeepWhitespaceMode + return true; // KeepWhitespaceMode // We only saw whitespace, so just try again with this lexer. // (We manually eliminate the tail call to avoid recursion.) @@ -4306,13 +4351,12 @@ // We can't just reset CurPtr to BufferPtr because BufferPtr may point to // an escaped newline. --CurOffset; - const char* CurPtr = BufferStart + CurOffset; - llvm::ConversionResult Status = - llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr, - (const llvm::UTF8 *)(BufferStart + BufferSize), - &CodePoint, - llvm::strictConversion); - CurOffset = CurPtr-BufferStart; + const char *CurPtr = BufferStart + CurOffset; + llvm::ConversionResult Status = llvm::convertUTF8Sequence( + (const llvm::UTF8 **)&CurPtr, + (const llvm::UTF8 *)(BufferStart + BufferSize), &CodePoint, + llvm::strictConversion); + CurOffset = CurPtr - BufferStart; if (Status == llvm::conversionOK) { if (CheckUnicodeWhitespace(Result, CodePoint, CurOffset)) { if (SkipWhitespace(Result, CurOffset, TokAtPhysicalStartOfLine)) @@ -4337,7 +4381,7 @@ // just diagnose the invalid UTF-8, then drop the character. Diag(CurOffset, diag::err_invalid_utf8); - BufferOffset = CurOffset+1; + BufferOffset = CurOffset + 1; // We're pretending the character didn't exist, so just try again with // this lexer. // (We manually eliminate the tail call to avoid recursion.) @@ -4376,7 +4420,7 @@ const dependency_directives_scan::Token &DDTok, Token &Result) { const char *TokPtr = BufferStart + DDTok.Offset; Result.startToken(); - Result.setLocation(getSourceLocation(TokPtr-BufferStart)); + Result.setLocation(getSourceLocation(TokPtr - BufferStart)); Result.setKind(DDTok.Kind); Result.setFlag((Token::TokenFlags)DDTok.Flags); Result.setLength(DDTok.Length);