Diff 120195

lib/Lex/Lexer.cpp

Show First 20 Lines • Show All 204 Lines • ▼ Show 20 Lines	Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
L->ParsingPreprocessorDirective = true;		L->ParsingPreprocessorDirective = true;

// This lexer really is for _Pragma.		// This lexer really is for _Pragma.
L->Is_PragmaLexer = true;		L->Is_PragmaLexer = true;
return L;		return L;
}		}

/// Stringify - Convert the specified string into a C string, with surrounding		/// Stringify - Convert the specified string into a C string, with surrounding
/// ""'s, and with escaped \ and " characters.		/// ""'s, and with escaped \ and " characters. The function replaces each
		/// newline character with the "\n" escape code as well.
std::string Lexer::Stringify(StringRef Str, bool Charify) {		std::string Lexer::Stringify(StringRef Str, bool Charify) {
		jkorous-appleUnsubmitted Not Done Reply Inline Actions I am not sure I understand this correctly but wouldn't it be more precise if these literals are escaped? ... escaping '\' ... -> ...escaping '\\' ... ... with "\n" ... -> ... with "\\n" Alternatively we could use R"(\)" and R"(\n)". jkorous-apple: I am not sure I understand this correctly but wouldn't it be more precise if these literals are…
std::string Result = Str;		std::string Result = Str;
char Quote = Charify ? '\'' : '"';		char Quote = Charify ? '\'' : '"';
for (unsigned i = 0, e = Result.size(); i != e; ++i) {		for (unsigned i = 0, e = Result.size(); i < e; ++i) {
		jkorous-appleUnsubmitted Not Done Reply Inline Actions Wouldn't auto or typename T::size_type instead of unsigned be more appropriate here? Both of your supported use cases have this member type. http://llvm.org/doxygen/classllvm_1_1StringRef.html#a54e59e2d53e5ee736ee060be7c457508 http://llvm.org/doxygen/classllvm_1_1SmallVectorImpl.html#acc72e8846802a1e703501219cf19458e jkorous-apple: Wouldn't auto or typename T::size_type instead of unsigned be more appropriate here?
if (Result[i] == '\\' \|\| Result[i] == Quote) {		if (Result[i] == '\\' \|\| Result[i] == Quote) {
Result.insert(Result.begin()+i, '\\');		Result.insert(Result.begin() + i, '\\');
++i; ++e;		++i;
		++e;
		} else if (auto Size = getEscapedNewLineSize(Result.substr(i).data())) {
		vsapsaiUnsubmitted Not Done Reply Inline Actions `getEscapedNewLineSize` mentions P[-1] is known to be a "\" or a trigraph equivalent on entry to this function. Is this precondition correct in this case? And `std::string::substr` creates a copy of a substring. It is inefficient in the loop and looks like you don't really need `std::string` here anyway. vsapsai: `getEscapedNewLineSize` mentions > P[-1] is known to be a "\" or a trigraph equivalent on entry…
		Result.erase(Result.begin() + i, Result.begin() + i + Size);
		Result.insert(Result.begin() + i, '\\');
		Result.insert(Result.begin() + i + 1, 'n');
		i += 2;
		e += (2 - Size);
}		}
}		}
return Result;		return Result;
}		}
		jkorous-appleUnsubmitted Not Done Reply Inline Actions I am just wondering if potential performance benefit of counting all the extra space in advance and resizing the string just once might be interesting here. Basically with current approach characters at the end of the string are moved as many times as there are endlines in the string. jkorous-apple: I am just wondering if potential performance benefit of counting all the extra space in advance…

/// Stringify - Convert the specified string into a C string by escaping '\'		/// Stringify - Convert the specified string into a C string by escaping '\'
/// and " characters. This does not add surrounding ""'s to the string.		/// and " characters. The function replaces each newline character with the
		/// "\n" escape code as well. This does not add surrounding ""'s to the string.
void Lexer::Stringify(SmallVectorImpl<char> &Str) {		void Lexer::Stringify(SmallVectorImpl<char> &Str) {
for (unsigned i = 0, e = Str.size(); i != e; ++i) {		for (unsigned i = 0, e = Str.size(); i < e; ++i) {
if (Str[i] == '\\' \|\| Str[i] == '"') {		if (Str[i] == '\\' \|\| Str[i] == '"') {
Str.insert(Str.begin()+i, '\\');		Str.insert(Str.begin() + i, '\\');
++i; ++e;		++i;
		++e;
		} else if (Str[i] == '\n' \|\| Str[i] == '\r') {
		unsigned Size = 1;
		if ((i < e - 1) && (Str[i + 1] == '\n' \|\| Str[i + 1] == '\r') &&
		Str[i] != Str[i + 1])
		Size += 1;

		Str.erase(Str.begin() + i, Str.begin() + i + Size);
		Str.insert(Str.begin() + i, '\\');
		Str.insert(Str.begin() + i + 1, 'n');
		i += 2;
		e += (2 - Size);
}		}
}		}
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Token Spelling		// Token Spelling
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

▲ Show 20 Lines • Show All 121 Lines • ▼ Show 20 Lines
/// to allocate enough space for the token, which is guaranteed to be at least		/// to allocate enough space for the token, which is guaranteed to be at least
/// Tok.getLength() bytes long. The actual length of the token is returned.		/// Tok.getLength() bytes long. The actual length of the token is returned.
///		///
/// Note that this method may do two possible things: it may either fill in		/// Note that this method may do two possible things: it may either fill in
/// the buffer specified with characters, or it may change the input pointer		/// the buffer specified with characters, or it may change the input pointer
/// to point to a constant buffer with the data already in it (avoiding a		/// to point to a constant buffer with the data already in it (avoiding a
/// copy). The caller is not allowed to modify the returned buffer pointer		/// copy). The caller is not allowed to modify the returned buffer pointer
/// if an internal buffer is returned.		/// if an internal buffer is returned.
unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,		unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
const SourceManager &SourceMgr,		const SourceManager &SourceMgr,
const LangOptions &LangOpts, bool *Invalid) {		const LangOptions &LangOpts, bool *Invalid) {
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");		assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");

const char *TokStart = nullptr;		const char *TokStart = nullptr;
// NOTE: this has to be checked before testing for an IdentifierInfo.		// NOTE: this has to be checked before testing for an IdentifierInfo.
if (Tok.is(tok::raw_identifier))		if (Tok.is(tok::raw_identifier))
TokStart = Tok.getRawIdentifier().data();		TokStart = Tok.getRawIdentifier().data();
▲ Show 20 Lines • Show All 208 Lines • ▼ Show 20 Lines	PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
do {		do {
TheLexer.LexFromRawLexer(TheTok);		TheLexer.LexFromRawLexer(TheTok);

if (InPreprocessorDirective) {		if (InPreprocessorDirective) {
// If we've hit the end of the file, we're done.		// If we've hit the end of the file, we're done.
if (TheTok.getKind() == tok::eof) {		if (TheTok.getKind() == tok::eof) {
break;		break;
}		}

// If we haven't hit the end of the preprocessor directive, skip this		// If we haven't hit the end of the preprocessor directive, skip this
// token.		// token.
if (!TheTok.isAtStartOfLine())		if (!TheTok.isAtStartOfLine())
continue;		continue;

// We've passed the end of the preprocessor directive, and will look		// We've passed the end of the preprocessor directive, and will look
// at this token again below.		// at this token again below.
InPreprocessorDirective = false;		InPreprocessorDirective = false;
}		}

// Keep track of the # of lines in the preamble.		// Keep track of the # of lines in the preamble.
if (TheTok.isAtStartOfLine()) {		if (TheTok.isAtStartOfLine()) {
unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset;		unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset;

// If we were asked to limit the number of lines in the preamble,		// If we were asked to limit the number of lines in the preamble,
// and we're about to exceed that limit, we're done.		// and we're about to exceed that limit, we're done.
if (MaxLineOffset && TokOffset >= MaxLineOffset)		if (MaxLineOffset && TokOffset >= MaxLineOffset)
break;		break;
}		}

// Comments are okay; skip over them.		// Comments are okay; skip over them.
if (TheTok.getKind() == tok::comment) {		if (TheTok.getKind() == tok::comment) {
if (ActiveCommentLoc.isInvalid())		if (ActiveCommentLoc.isInvalid())
ActiveCommentLoc = TheTok.getLocation();		ActiveCommentLoc = TheTok.getLocation();
continue;		continue;
}		}

if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) {		if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) {
// This is the start of a preprocessor directive.		// This is the start of a preprocessor directive.
Token HashTok = TheTok;		Token HashTok = TheTok;
InPreprocessorDirective = true;		InPreprocessorDirective = true;
ActiveCommentLoc = SourceLocation();		ActiveCommentLoc = SourceLocation();

// Figure out which directive this is. Since we're lexing raw tokens,		// Figure out which directive this is. Since we're lexing raw tokens,
// we don't have an identifier table available. Instead, just look at		// we don't have an identifier table available. Instead, just look at
// the raw identifier to recognize and categorize preprocessor directives.		// the raw identifier to recognize and categorize preprocessor directives.
TheLexer.LexFromRawLexer(TheTok);		TheLexer.LexFromRawLexer(TheTok);
if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {		if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {
StringRef Keyword = TheTok.getRawIdentifier();		StringRef Keyword = TheTok.getRawIdentifier();
PreambleDirectiveKind PDK		PreambleDirectiveKind PDK
= llvm::StringSwitch<PreambleDirectiveKind>(Keyword)		= llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
Show All 23 Lines	if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) {
case PDK_Skipped:		case PDK_Skipped:
continue;		continue;

case PDK_Unknown:		case PDK_Unknown:
// We don't know what this directive is; stop at the '#'.		// We don't know what this directive is; stop at the '#'.
break;		break;
}		}
}		}

// We only end up here if we didn't recognize the preprocessor		// We only end up here if we didn't recognize the preprocessor
// directive or it was one that can't occur in the preamble at this		// directive or it was one that can't occur in the preamble at this
// point. Roll back the current token to the location of the '#'.		// point. Roll back the current token to the location of the '#'.
InPreprocessorDirective = false;		InPreprocessorDirective = false;
TheTok = HashTok;		TheTok = HashTok;
}		}

// We hit a token that we don't recognize as being in the		// We hit a token that we don't recognize as being in the
// "preprocessing only" part of the file, so we're no longer in		// "preprocessing only" part of the file, so we're no longer in
// the preamble.		// the preamble.
break;		break;
} while (true);		} while (true);

SourceLocation End;		SourceLocation End;
if (ActiveCommentLoc.isValid())		if (ActiveCommentLoc.isValid())
End = ActiveCommentLoc; // don't truncate a decl comment.		End = ActiveCommentLoc; // don't truncate a decl comment.
else		else
End = TheTok.getLocation();		End = TheTok.getLocation();

return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(),		return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(),
TheTok.isAtStartOfLine());		TheTok.isAtStartOfLine());
}		}

/// AdvanceToTokenCharacter - Given a location that specifies the start of a		/// AdvanceToTokenCharacter - Given a location that specifies the start of a
/// token, return a new location that specifies a character within the token.		/// token, return a new location that specifies a character within the token.
SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,		SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
unsigned CharNo,		unsigned CharNo,
const SourceManager &SM,		const SourceManager &SM,
const LangOptions &LangOpts) {		const LangOptions &LangOpts) {
// Figure out how many physical characters away the specified expansion		// Figure out how many physical characters away the specified expansion
// character is. This needs to take into consideration newlines and		// character is. This needs to take into consideration newlines and
// trigraphs.		// trigraphs.
bool Invalid = false;		bool Invalid = false;
const char *TokPtr = SM.getCharacterData(TokStart, &Invalid);		const char *TokPtr = SM.getCharacterData(TokStart, &Invalid);

// If they request the first char of the token, we're trivially done.		// If they request the first char of the token, we're trivially done.
if (Invalid \|\| (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))		if (Invalid \|\| (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
return TokStart;		return TokStart;

unsigned PhysOffset = 0;		unsigned PhysOffset = 0;

// The usual case is that tokens don't contain anything interesting. Skip		// The usual case is that tokens don't contain anything interesting. Skip
// over the uninteresting characters. If a token only consists of simple		// over the uninteresting characters. If a token only consists of simple
// chars, this method is extremely fast.		// chars, this method is extremely fast.
while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {		while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
if (CharNo == 0)		if (CharNo == 0)
return TokStart.getLocWithOffset(PhysOffset);		return TokStart.getLocWithOffset(PhysOffset);
++TokPtr;		++TokPtr;
--CharNo;		--CharNo;
++PhysOffset;		++PhysOffset;
}		}

// If we have a character that may be a trigraph or escaped newline, use a		// If we have a character that may be a trigraph or escaped newline, use a
// lexer to parse it correctly.		// lexer to parse it correctly.
for (; CharNo; --CharNo) {		for (; CharNo; --CharNo) {
unsigned Size;		unsigned Size;
Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts);		Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts);
TokPtr += Size;		TokPtr += Size;
PhysOffset += Size;		PhysOffset += Size;
}		}

// Final detail: if we end up on an escaped newline, we want to return the		// Final detail: if we end up on an escaped newline, we want to return the
// location of the actual byte of the token. For example foo\<newline>bar		// location of the actual byte of the token. For example foo\<newline>bar
// advanced by 3 should return the location of b, not of \\. One compounding		// advanced by 3 should return the location of b, not of \\. One compounding
// detail of this is that the escape may be made by a trigraph.		// detail of this is that the escape may be made by a trigraph.
if (!Lexer::isObviouslySimpleCharacter(*TokPtr))		if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;		PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;

return TokStart.getLocWithOffset(PhysOffset);		return TokStart.getLocWithOffset(PhysOffset);
}		}

/// \brief Computes the source location just past the end of the		/// \brief Computes the source location just past the end of the
/// token at this source location.		/// token at this source location.
///		///
/// This routine can be used to produce a source location that		/// This routine can be used to produce a source location that
/// points just past the end of the token referenced by \p Loc, and		/// points just past the end of the token referenced by \p Loc, and
Show All 18 Lines	if (Offset > 0 \|\| !isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
return SourceLocation(); // Points inside the macro expansion.		return SourceLocation(); // Points inside the macro expansion.
}		}

unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts);		unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
if (Len > Offset)		if (Len > Offset)
Len = Len - Offset;		Len = Len - Offset;
else		else
return Loc;		return Loc;

return Loc.getLocWithOffset(Len);		return Loc.getLocWithOffset(Len);
}		}

/// \brief Returns true if the given MacroID location points at the first		/// \brief Returns true if the given MacroID location points at the first
/// token of the macro expansion.		/// token of the macro expansion.
bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,		bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
const SourceManager &SM,		const SourceManager &SM,
const LangOptions &LangOpts,		const LangOptions &LangOpts,
▲ Show 20 Lines • Show All 180 Lines • ▼ Show 20 Lines	while (true) {
const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);		const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);
const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();		const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
Loc = Expansion.getExpansionLocStart();		Loc = Expansion.getExpansionLocStart();
if (!Expansion.isMacroArgExpansion())		if (!Expansion.isMacroArgExpansion())
break;		break;

// For macro arguments we need to check that the argument did not come		// For macro arguments we need to check that the argument did not come
// from an inner macro, e.g: "MAC1( MAC2(foo) )"		// from an inner macro, e.g: "MAC1( MAC2(foo) )"

// Loc points to the argument id of the macro definition, move to the		// Loc points to the argument id of the macro definition, move to the
// macro expansion.		// macro expansion.
Loc = SM.getImmediateExpansionRange(Loc).first;		Loc = SM.getImmediateExpansionRange(Loc).first;
SourceLocation SpellLoc = Expansion.getSpellingLoc();		SourceLocation SpellLoc = Expansion.getSpellingLoc();
if (SpellLoc.isFileID())		if (SpellLoc.isFileID())
break; // No inner macro.		break; // No inner macro.

// If spelling location resides in the same FileID as macro expansion		// If spelling location resides in the same FileID as macro expansion
▲ Show 20 Lines • Show All 809 Lines • ▼ Show 20 Lines	Diag(BufferPtr, getLangOpts().CPlusPlus
: diag::warn_c99_compat_unicode_literal);		: diag::warn_c99_compat_unicode_literal);

char C = getAndAdvanceChar(CurPtr, Result);		char C = getAndAdvanceChar(CurPtr, Result);
while (C != '"') {		while (C != '"') {
// Skip escaped characters. Escaped newlines will already be processed by		// Skip escaped characters. Escaped newlines will already be processed by
// getAndAdvanceChar.		// getAndAdvanceChar.
if (C == '\\')		if (C == '\\')
C = getAndAdvanceChar(CurPtr, Result);		C = getAndAdvanceChar(CurPtr, Result);

if (C == '\n' \|\| C == '\r' \|\| // Newline.		if (C == '\n' \|\| C == '\r' \|\| // Newline.
(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.		(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)		if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;		Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
FormTokenWithChars(Result, CurPtr-1, tok::unknown);		FormTokenWithChars(Result, CurPtr-1, tok::unknown);
return true;		return true;
}		}

if (C == 0) {		if (C == 0) {
if (isCodeCompletionPoint(CurPtr-1)) {		if (isCodeCompletionPoint(CurPtr-1)) {
PP->CodeCompleteNaturalLanguage();		PP->CodeCompleteNaturalLanguage();
FormTokenWithChars(Result, CurPtr-1, tok::unknown);		FormTokenWithChars(Result, CurPtr-1, tok::unknown);
cutOffLexing();		cutOffLexing();
return true;		return true;
}		}

▲ Show 20 Lines • Show All 412 Lines • ▼ Show 20 Lines	if (!ParsingPreprocessorDirective \|\| LexingRawMode)
return true;		return true;

// If this Line-style comment is in a macro definition, transmogrify it into		// If this Line-style comment is in a macro definition, transmogrify it into
// a C-style block comment.		// a C-style block comment.
bool Invalid = false;		bool Invalid = false;
std::string Spelling = PP->getSpelling(Result, &Invalid);		std::string Spelling = PP->getSpelling(Result, &Invalid);
if (Invalid)		if (Invalid)
return true;		return true;

assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?");		assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?");
Spelling[1] = ''; // Change prefix to "/".		Spelling[1] = ''; // Change prefix to "/".
Spelling += "*/"; // add suffix.		Spelling += "*/"; // add suffix.

Result.setKind(tok::comment);		Result.setKind(tok::comment);
PP->CreateString(Spelling, Result,		PP->CreateString(Spelling, Result,
Result.getLocation(), Result.getLocation());		Result.getLocation(), Result.getLocation());
return true;		return true;
▲ Show 20 Lines • Show All 309 Lines • ▼ Show 20 Lines	if (ParsingPreprocessorDirective) {
// Update the location of token as well as BufferPtr.		// Update the location of token as well as BufferPtr.
FormTokenWithChars(Result, CurPtr, tok::eod);		FormTokenWithChars(Result, CurPtr, tok::eod);

// Restore comment saving mode, in case it was disabled for directive.		// Restore comment saving mode, in case it was disabled for directive.
if (PP)		if (PP)
resetExtendedTokenMode();		resetExtendedTokenMode();
return true; // Have a token.		return true; // Have a token.
}		}

// If we are in raw mode, return this event as an EOF token. Let the caller		// If we are in raw mode, return this event as an EOF token. Let the caller
// that put us in raw mode handle the event.		// that put us in raw mode handle the event.
if (isLexingRawMode()) {		if (isLexingRawMode()) {
Result.startToken();		Result.startToken();
BufferPtr = BufferEnd;		BufferPtr = BufferEnd;
FormTokenWithChars(Result, BufferEnd, tok::eof);		FormTokenWithChars(Result, BufferEnd, tok::eof);
return true;		return true;
}		}

if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) {		if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) {
PP->setRecordedPreambleConditionalStack(ConditionalStack);		PP->setRecordedPreambleConditionalStack(ConditionalStack);
ConditionalStack.clear();		ConditionalStack.clear();
}		}

// Issue diagnostics for unterminated #if and missing newline.		// Issue diagnostics for unterminated #if and missing newline.

// If we are in a #if directive, emit an error.		// If we are in a #if directive, emit an error.
▲ Show 20 Lines • Show All 95 Lines • ▼ Show 20 Lines
/// control conflict marker like '<<<<<<<', recognize it as such, emit an error		/// control conflict marker like '<<<<<<<', recognize it as such, emit an error
/// and recover nicely. This returns true if it is a conflict marker and false		/// and recover nicely. This returns true if it is a conflict marker and false
/// if not.		/// if not.
bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {		bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
// Only a conflict marker if it starts at the beginning of a line.		// Only a conflict marker if it starts at the beginning of a line.
if (CurPtr != BufferStart &&		if (CurPtr != BufferStart &&
CurPtr[-1] != '\n' && CurPtr[-1] != '\r')		CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
return false;		return false;

// Check to see if we have <<<<<<< or >>>>.		// Check to see if we have <<<<<<< or >>>>.
if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith("<<<<<<<") &&		if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith("<<<<<<<") &&
!StringRef(CurPtr, BufferEnd - CurPtr).startswith(">>>> "))		!StringRef(CurPtr, BufferEnd - CurPtr).startswith(">>>> "))
return false;		return false;

// If we have a situation where we don't care about conflict markers, ignore		// If we have a situation where we don't care about conflict markers, ignore
// it.		// it.
if (CurrentConflictMarkerState \|\| isLexingRawMode())		if (CurrentConflictMarkerState \|\| isLexingRawMode())
return false;		return false;

ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce;		ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce;

// Check to see if there is an ending marker somewhere in the buffer at the		// Check to see if there is an ending marker somewhere in the buffer at the
// start of a line to terminate this conflict marker.		// start of a line to terminate this conflict marker.
if (FindConflictEnd(CurPtr, BufferEnd, Kind)) {		if (FindConflictEnd(CurPtr, BufferEnd, Kind)) {
// We found a match. We are really in a conflict marker.		// We found a match. We are really in a conflict marker.
// Diagnose this, and ignore to the end of line.		// Diagnose this, and ignore to the end of line.
Diag(CurPtr, diag::err_conflict_marker);		Diag(CurPtr, diag::err_conflict_marker);
CurrentConflictMarkerState = Kind;		CurrentConflictMarkerState = Kind;

// Skip ahead to the end of line. We know this exists because the		// Skip ahead to the end of line. We know this exists because the
// end-of-conflict marker starts with \r or \n.		// end-of-conflict marker starts with \r or \n.
while (CurPtr != '\r' && CurPtr != '\n') {		while (CurPtr != '\r' && CurPtr != '\n') {
assert(CurPtr != BufferEnd && "Didn't find end of line");		assert(CurPtr != BufferEnd && "Didn't find end of line");
++CurPtr;		++CurPtr;
}		}
BufferPtr = CurPtr;		BufferPtr = CurPtr;
return true;		return true;
}		}

// No end of conflict marker found.		// No end of conflict marker found.
return false;		return false;
}		}

/// HandleEndOfConflictMarker - If this is a '====' or '\|\|\|\|' or '>>>>', or if		/// HandleEndOfConflictMarker - If this is a '====' or '\|\|\|\|' or '>>>>', or if
/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it		/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it
/// is the end of a conflict marker. Handle it by ignoring up until the end of		/// is the end of a conflict marker. Handle it by ignoring up until the end of
/// the line. This returns true if it is a conflict marker and false if not.		/// the line. This returns true if it is a conflict marker and false if not.
bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {		bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
// Only a conflict marker if it starts at the beginning of a line.		// Only a conflict marker if it starts at the beginning of a line.
if (CurPtr != BufferStart &&		if (CurPtr != BufferStart &&
CurPtr[-1] != '\n' && CurPtr[-1] != '\r')		CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
return false;		return false;

// If we have a situation where we don't care about conflict markers, ignore		// If we have a situation where we don't care about conflict markers, ignore
// it.		// it.
if (!CurrentConflictMarkerState \|\| isLexingRawMode())		if (!CurrentConflictMarkerState \|\| isLexingRawMode())
return false;		return false;

// Check to see if we have the marker (4 characters in a row).		// Check to see if we have the marker (4 characters in a row).
for (unsigned i = 1; i != 4; ++i)		for (unsigned i = 1; i != 4; ++i)
if (CurPtr[i] != CurPtr[0])		if (CurPtr[i] != CurPtr[0])
return false;		return false;

// If we do have it, search for the end of the conflict marker. This could		// If we do have it, search for the end of the conflict marker. This could
// fail if it got skipped with a '#if 0' or something. Note that CurPtr might		// fail if it got skipped with a '#if 0' or something. Note that CurPtr might
// be the end of conflict marker.		// be the end of conflict marker.
if (const char *End = FindConflictEnd(CurPtr, BufferEnd,		if (const char *End = FindConflictEnd(CurPtr, BufferEnd,
CurrentConflictMarkerState)) {		CurrentConflictMarkerState)) {
CurPtr = End;		CurPtr = End;

// Skip ahead to the end of line.		// Skip ahead to the end of line.
while (CurPtr != BufferEnd && CurPtr != '\r' && CurPtr != '\n')		while (CurPtr != BufferEnd && CurPtr != '\r' && CurPtr != '\n')
++CurPtr;		++CurPtr;

BufferPtr = CurPtr;		BufferPtr = CurPtr;

// No longer in the conflict marker.		// No longer in the conflict marker.
CurrentConflictMarkerState = CMK_None;		CurrentConflictMarkerState = CMK_None;
return true;		return true;
}		}

return false;		return false;
}		}

static const char findPlaceholderEnd(const char CurPtr,		static const char findPlaceholderEnd(const char CurPtr,
const char *BufferEnd) {		const char *BufferEnd) {
if (CurPtr == BufferEnd)		if (CurPtr == BufferEnd)
return nullptr;		return nullptr;
BufferEnd -= 1; // Scan until the second last character.		BufferEnd -= 1; // Scan until the second last character.
▲ Show 20 Lines • Show All 292 Lines • ▼ Show 20 Lines	if (!isLexingRawMode())
Diag(CurPtr-1, diag::null_in_file);		Diag(CurPtr-1, diag::null_in_file);
Result.setFlag(Token::LeadingSpace);		Result.setFlag(Token::LeadingSpace);
if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))		if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
return true; // KeepWhitespaceMode		return true; // KeepWhitespaceMode

// We know the lexer hasn't changed, so just try again with this lexer.		// We know the lexer hasn't changed, so just try again with this lexer.
// (We manually eliminate the tail call to avoid recursion.)		// (We manually eliminate the tail call to avoid recursion.)
goto LexNextToken;		goto LexNextToken;

case 26: // DOS & CP/M EOF: "^Z".		case 26: // DOS & CP/M EOF: "^Z".
// If we're in Microsoft extensions mode, treat this as end of file.		// If we're in Microsoft extensions mode, treat this as end of file.
if (LangOpts.MicrosoftExt) {		if (LangOpts.MicrosoftExt) {
if (!isLexingRawMode())		if (!isLexingRawMode())
Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);		Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
return LexEndOfFile(Result, CurPtr-1);		return LexEndOfFile(Result, CurPtr-1);
}		}

// If Microsoft extensions are disabled, this is just random garbage.		// If Microsoft extensions are disabled, this is just random garbage.
Kind = tok::unknown;		Kind = tok::unknown;
break;		break;

case '\r':		case '\r':
if (CurPtr[0] == '\n')		if (CurPtr[0] == '\n')
Char = getAndAdvanceChar(CurPtr, Result);		Char = getAndAdvanceChar(CurPtr, Result);
LLVM_FALLTHROUGH;		LLVM_FALLTHROUGH;
case '\n':		case '\n':
// If we are inside a preprocessor directive and we see the end of line,		// If we are inside a preprocessor directive and we see the end of line,
// we know we are done with the directive, so return an EOD token.		// we know we are done with the directive, so return an EOD token.
if (ParsingPreprocessorDirective) {		if (ParsingPreprocessorDirective) {
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines	if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
return true; // There is a token to return.		return true; // There is a token to return.
goto SkipIgnoredUnits;		goto SkipIgnoredUnits;
} else if (isHorizontalWhitespace(*CurPtr)) {		} else if (isHorizontalWhitespace(*CurPtr)) {
goto SkipHorizontalWhitespace;		goto SkipHorizontalWhitespace;
}		}
// We only saw whitespace, so just try again with this lexer.		// We only saw whitespace, so just try again with this lexer.
// (We manually eliminate the tail call to avoid recursion.)		// (We manually eliminate the tail call to avoid recursion.)
goto LexNextToken;		goto LexNextToken;

// C99 6.4.4.1: Integer Constants.		// C99 6.4.4.1: Integer Constants.
// C99 6.4.4.2: Floating Constants.		// C99 6.4.4.2: Floating Constants.
case '0': case '1': case '2': case '3': case '4':		case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':		case '5': case '6': case '7': case '8': case '9':
// Notify MIOpt that we read a non-whitespace/non-comment token.		// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();		MIOpt.ReadToken();
return LexNumericConstant(Result, CurPtr);		return LexNumericConstant(Result, CurPtr);

▲ Show 20 Lines • Show All 482 Lines • ▼ Show 20 Lines	case ';':
Kind = tok::semi;		Kind = tok::semi;
break;		break;
case '=':		case '=':
Char = getCharAndSize(CurPtr, SizeTmp);		Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '=') {		if (Char == '=') {
// If this is '====' and we're in a conflict marker, ignore it.		// If this is '====' and we're in a conflict marker, ignore it.
if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))		if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
goto LexNextToken;		goto LexNextToken;

Kind = tok::equalequal;		Kind = tok::equalequal;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);		CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else {		} else {
Kind = tok::equal;		Kind = tok::equal;
}		}
break;		break;
case ',':		case ',':
Kind = tok::comma;		Kind = tok::comma;
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	if (Status == llvm::conversionOK) {
return true; // KeepWhitespaceMode		return true; // KeepWhitespaceMode

// We only saw whitespace, so just try again with this lexer.		// We only saw whitespace, so just try again with this lexer.
// (We manually eliminate the tail call to avoid recursion.)		// (We manually eliminate the tail call to avoid recursion.)
goto LexNextToken;		goto LexNextToken;
}		}
return LexUnicode(Result, CodePoint, CurPtr);		return LexUnicode(Result, CodePoint, CurPtr);
}		}

if (isLexingRawMode() \|\| ParsingPreprocessorDirective \|\|		if (isLexingRawMode() \|\| ParsingPreprocessorDirective \|\|
PP->isPreprocessedOutput()) {		PP->isPreprocessedOutput()) {
++CurPtr;		++CurPtr;
Kind = tok::unknown;		Kind = tok::unknown;
break;		break;
}		}

// Non-ASCII characters tend to creep into source code unintentionally.		// Non-ASCII characters tend to creep into source code unintentionally.
Show All 34 Lines

test/Preprocessor/macro_raw_string.cpp

This file was added.

				// RUN: %clang_cc1 -E -std=c++11 %s -o %t
				// RUN: %clang_cc1 %t

				#define FOO(str) foo(#str)

				extern void foo(const char *str);

				void bar() {
				FOO(R"(foo
				bar)");
				}

unittests/Lex/LexerTest.cpp

Show All 31 Lines
// The test fixture.		// The test fixture.
class LexerTest : public ::testing::Test {		class LexerTest : public ::testing::Test {
protected:		protected:
LexerTest()		LexerTest()
: FileMgr(FileMgrOpts),		: FileMgr(FileMgrOpts),
DiagID(new DiagnosticIDs()),		DiagID(new DiagnosticIDs()),
Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),		Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
SourceMgr(Diags, FileMgr),		SourceMgr(Diags, FileMgr),
TargetOpts(new TargetOptions)		TargetOpts(new TargetOptions)
{		{
TargetOpts->Triple = "x86_64-apple-darwin11.1.0";		TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);		Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
}		}

std::unique_ptr<Preprocessor> CreatePP(StringRef Source,		std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
TrivialModuleLoader &ModLoader) {		TrivialModuleLoader &ModLoader) {
std::unique_ptr<llvm::MemoryBuffer> Buf =		std::unique_ptr<llvm::MemoryBuffer> Buf =
▲ Show 20 Lines • Show All 424 Lines • ▼ Show 20 Lines	TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
}		}
}		}

TEST_F(LexerTest, AvoidPastEndOfStringDereference) {		TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
std::vector<Token> LexedTokens = Lex(" // \\\n");		std::vector<Token> LexedTokens = Lex(" // \\\n");
EXPECT_TRUE(LexedTokens.empty());		EXPECT_TRUE(LexedTokens.empty());
}		}

		TEST_F(LexerTest, StringizingRasString) {
		std::string String1 = R"(foo
		{"bar":[]}
		baz)";
		SmallString<128> String2;
		String2 += String1.c_str();

		String1 = Lexer::Stringify(StringRef(String1));
		Lexer::Stringify(String2);

		EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)");
		EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)");
		}

} // anonymous namespace		} // anonymous namespace

This is an archive of the discontinued LLVM Phabricator instance.

Stringizing raw string literals containing newline
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 120195

lib/Lex/Lexer.cpp

test/Preprocessor/macro_raw_string.cpp

unittests/Lex/LexerTest.cpp

This is an archive of the discontinued LLVM Phabricator instance.

Stringizing raw string literals containing newlineClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 120195

lib/Lex/Lexer.cpp

test/Preprocessor/macro_raw_string.cpp

unittests/Lex/LexerTest.cpp

Stringizing raw string literals containing newline
ClosedPublic