Diff 123658

include/clang/Lex/Lexer.h

Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines	class Lexer : public PreprocessorLexer {

//===--------------------------------------------------------------------===//		//===--------------------------------------------------------------------===//
// Constant configuration values for this lexer.		// Constant configuration values for this lexer.
const char *BufferStart; // Start of the buffer.		const char *BufferStart; // Start of the buffer.
const char *BufferEnd; // End of the buffer.		const char *BufferEnd; // End of the buffer.
SourceLocation FileLoc; // Location for start of file.		SourceLocation FileLoc; // Location for start of file.
LangOptions LangOpts; // LangOpts enabled by this language (cache).		LangOptions LangOpts; // LangOpts enabled by this language (cache).
bool Is_PragmaLexer; // True if lexer for _Pragma handling.		bool Is_PragmaLexer; // True if lexer for _Pragma handling.

//===--------------------------------------------------------------------===//		//===--------------------------------------------------------------------===//
// Context-specific lexing flags set by the preprocessor.		// Context-specific lexing flags set by the preprocessor.
//		//

/// ExtendedTokenMode - The lexer can optionally keep comments and whitespace		/// ExtendedTokenMode - The lexer can optionally keep comments and whitespace
/// and return them as tokens. This is used for -C and -CC modes, and		/// and return them as tokens. This is used for -C and -CC modes, and
/// whitespace preservation can be useful for some clients that want to lex		/// whitespace preservation can be useful for some clients that want to lex
/// the file in raw mode and get every character from the file.		/// the file in raw mode and get every character from the file.
▲ Show 20 Lines • Show All 154 Lines • ▼ Show 20 Lines	public:
/// getSourceLocation - Return a source location for the next character in		/// getSourceLocation - Return a source location for the next character in
/// the current file.		/// the current file.
SourceLocation getSourceLocation() override {		SourceLocation getSourceLocation() override {
return getSourceLocation(BufferPtr);		return getSourceLocation(BufferPtr);
}		}

/// \brief Return the current location in the buffer.		/// \brief Return the current location in the buffer.
const char *getBufferLocation() const { return BufferPtr; }		const char *getBufferLocation() const { return BufferPtr; }

/// Stringify - Convert the specified string into a C string by escaping '\'		/// Stringify - Convert the specified string into a C string. This does not
/// and " characters. This does not add surrounding ""'s to the string.		/// add surrounding ""'s to the string. If Charify is true, this escapes the
/// If Charify is true, this escapes the ' character instead of ".		/// ' character instead of ".
		jkorous-appleUnsubmitted Not Done Reply Inline Actions Shouldn't we put all the details from implementation annotation here as well (since this is the public interface that people will actually use)? jkorous-apple: Shouldn't we put all the details from implementation annotation here as well (since this is the…
static std::string Stringify(StringRef Str, bool Charify = false);		static std::string Stringify(StringRef Str, bool Charify = false);

/// Stringify - Convert the specified string into a C string by escaping '\'		/// Stringify - Convert the specified string into a C string. This does not
/// and " characters. This does not add surrounding ""'s to the string.		/// add surrounding ""'s to the string.
		jkorous-appleUnsubmitted Not Done Reply Inline Actions Shouldn't we put all the details from implementation annotation here as well (since this is the public interface that people will actually use)? jkorous-apple: Shouldn't we put all the details from implementation annotation here as well (since this is the…
static void Stringify(SmallVectorImpl<char> &Str);		static void Stringify(SmallVectorImpl<char> &Str);


/// getSpelling - This method is used to get the spelling of a token into a		/// getSpelling - This method is used to get the spelling of a token into a
/// preallocated buffer, instead of as an std::string. The caller is required		/// preallocated buffer, instead of as an std::string. The caller is required
/// to allocate enough space for the token, which is guaranteed to be at least		/// to allocate enough space for the token, which is guaranteed to be at least
/// Tok.getLength() bytes long. The length of the actual result is returned.		/// Tok.getLength() bytes long. The length of the actual result is returned.
///		///
/// Note that this method may do two possible things: it may either fill in		/// Note that this method may do two possible things: it may either fill in
/// the buffer specified with characters, or it may change the input pointer		/// the buffer specified with characters, or it may change the input pointer
/// to point to a constant buffer with the data already in it (avoiding a		/// to point to a constant buffer with the data already in it (avoiding a
/// copy). The caller is not allowed to modify the returned buffer pointer		/// copy). The caller is not allowed to modify the returned buffer pointer
/// if an internal buffer is returned.		/// if an internal buffer is returned.
static unsigned getSpelling(const Token &Tok, const char *&Buffer,		static unsigned getSpelling(const Token &Tok, const char *&Buffer,
const SourceManager &SourceMgr,		const SourceManager &SourceMgr,
const LangOptions &LangOpts,		const LangOptions &LangOpts,
bool *Invalid = nullptr);		bool *Invalid = nullptr);

/// getSpelling() - Return the 'spelling' of the Tok token. The spelling of a		/// getSpelling() - Return the 'spelling' of the Tok token. The spelling of a
/// token is the characters used to represent the token in the source file		/// token is the characters used to represent the token in the source file
/// after trigraph expansion and escaped-newline folding. In particular, this		/// after trigraph expansion and escaped-newline folding. In particular, this
/// wants to get the true, uncanonicalized, spelling of things like digraphs		/// wants to get the true, uncanonicalized, spelling of things like digraphs
/// UCNs, etc.		/// UCNs, etc.
static std::string getSpelling(const Token &Tok,		static std::string getSpelling(const Token &Tok,
const SourceManager &SourceMgr,		const SourceManager &SourceMgr,
const LangOptions &LangOpts,		const LangOptions &LangOpts,
bool *Invalid = nullptr);		bool *Invalid = nullptr);

/// getSpelling - This method is used to get the spelling of the		/// getSpelling - This method is used to get the spelling of the
/// token at the given source location. If, as is usually true, it		/// token at the given source location. If, as is usually true, it
/// is not necessary to copy any data, then the returned string may		/// is not necessary to copy any data, then the returned string may
/// not point into the provided buffer.		/// not point into the provided buffer.
///		///
/// This method lexes at the expansion depth of the given		/// This method lexes at the expansion depth of the given
/// location and does not jump to the expansion or spelling		/// location and does not jump to the expansion or spelling
/// location.		/// location.
static StringRef getSpelling(SourceLocation loc,		static StringRef getSpelling(SourceLocation loc,
SmallVectorImpl<char> &buffer,		SmallVectorImpl<char> &buffer,
const SourceManager &SourceMgr,		const SourceManager &SourceMgr,
const LangOptions &LangOpts,		const LangOptions &LangOpts,
bool *invalid = nullptr);		bool *invalid = nullptr);

/// MeasureTokenLength - Relex the token at the specified location and return		/// MeasureTokenLength - Relex the token at the specified location and return
/// its length in bytes in the input file. If the token needs cleaning (e.g.		/// its length in bytes in the input file. If the token needs cleaning (e.g.
/// includes a trigraph or an escaped newline) then this count includes bytes		/// includes a trigraph or an escaped newline) then this count includes bytes
/// that are part of that.		/// that are part of that.
static unsigned MeasureTokenLength(SourceLocation Loc,		static unsigned MeasureTokenLength(SourceLocation Loc,
const SourceManager &SM,		const SourceManager &SM,
const LangOptions &LangOpts);		const LangOptions &LangOpts);

/// \brief Relex the token at the specified location.		/// \brief Relex the token at the specified location.
/// \returns true if there was a failure, false on success.		/// \returns true if there was a failure, false on success.
static bool getRawToken(SourceLocation Loc, Token &Result,		static bool getRawToken(SourceLocation Loc, Token &Result,
const SourceManager &SM,		const SourceManager &SM,
const LangOptions &LangOpts,		const LangOptions &LangOpts,
bool IgnoreWhiteSpace = false);		bool IgnoreWhiteSpace = false);

/// \brief Given a location any where in a source buffer, find the location		/// \brief Given a location any where in a source buffer, find the location
/// that corresponds to the beginning of the token in which the original		/// that corresponds to the beginning of the token in which the original
/// source location lands.		/// source location lands.
static SourceLocation GetBeginningOfToken(SourceLocation Loc,		static SourceLocation GetBeginningOfToken(SourceLocation Loc,
const SourceManager &SM,		const SourceManager &SM,
const LangOptions &LangOpts);		const LangOptions &LangOpts);

/// AdvanceToTokenCharacter - If the current SourceLocation specifies a		/// AdvanceToTokenCharacter - If the current SourceLocation specifies a
/// location at the start of a token, return a new location that specifies a		/// location at the start of a token, return a new location that specifies a
/// character within the token. This handles trigraphs and escaped newlines.		/// character within the token. This handles trigraphs and escaped newlines.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,		static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
unsigned Character,		unsigned Character,
const SourceManager &SM,		const SourceManager &SM,
const LangOptions &LangOpts);		const LangOptions &LangOpts);

/// \brief Computes the source location just past the end of the		/// \brief Computes the source location just past the end of the
/// token at this source location.		/// token at this source location.
///		///
/// This routine can be used to produce a source location that		/// This routine can be used to produce a source location that
/// points just past the end of the token referenced by \p Loc, and		/// points just past the end of the token referenced by \p Loc, and
/// is generally used when a diagnostic needs to point just after a		/// is generally used when a diagnostic needs to point just after a
/// token where it expected something different that it received. If		/// token where it expected something different that it received. If
/// the returned source location would not be meaningful (e.g., if		/// the returned source location would not be meaningful (e.g., if
▲ Show 20 Lines • Show All 330 Lines • ▼ Show 20 Lines	private:
bool LexEndOfFile (Token &Result, const char *CurPtr);		bool LexEndOfFile (Token &Result, const char *CurPtr);
bool SkipWhitespace (Token &Result, const char *CurPtr,		bool SkipWhitespace (Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine);		bool &TokAtPhysicalStartOfLine);
bool SkipLineComment (Token &Result, const char *CurPtr,		bool SkipLineComment (Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine);		bool &TokAtPhysicalStartOfLine);
bool SkipBlockComment (Token &Result, const char *CurPtr,		bool SkipBlockComment (Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine);		bool &TokAtPhysicalStartOfLine);
bool SaveLineComment (Token &Result, const char *CurPtr);		bool SaveLineComment (Token &Result, const char *CurPtr);

bool IsStartOfConflictMarker(const char *CurPtr);		bool IsStartOfConflictMarker(const char *CurPtr);
bool HandleEndOfConflictMarker(const char *CurPtr);		bool HandleEndOfConflictMarker(const char *CurPtr);

bool lexEditorPlaceholder(Token &Result, const char *CurPtr);		bool lexEditorPlaceholder(Token &Result, const char *CurPtr);

bool isCodeCompletionPoint(const char *CurPtr) const;		bool isCodeCompletionPoint(const char *CurPtr) const;
void cutOffLexing() { BufferPtr = BufferEnd; }		void cutOffLexing() { BufferPtr = BufferEnd; }

▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines

lib/Lex/Lexer.cpp

Show First 20 Lines • Show All 203 Lines • ▼ Show 20 Lines	Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
// return an EOD token.		// return an EOD token.
L->ParsingPreprocessorDirective = true;		L->ParsingPreprocessorDirective = true;

// This lexer really is for _Pragma.		// This lexer really is for _Pragma.
L->Is_PragmaLexer = true;		L->Is_PragmaLexer = true;
return L;		return L;
}		}

/// Stringify - Convert the specified string into a C string, with surrounding		/// StringifyImpl - Implementation of Stringify functions. Convert the
/// ""'s, and with escaped \ and " characters.		/// specified string into a C string by i) escaping '\' and " characters and
		/// ii) replacing newline character(s) with "\n".
		jkorous-appleUnsubmitted Not Done Reply Inline Actions I am not sure I understand this correctly but wouldn't it be more precise if these literals are escaped? ... escaping '\' ... -> ...escaping '\\' ... ... with "\n" ... -> ... with "\\n" Alternatively we could use R"(\)" and R"(\n)". jkorous-apple: I am not sure I understand this correctly but wouldn't it be more precise if these literals are…
		template <typename T>
		void StringifyImpl(T& Str, char Quote) {
		unsigned i = 0, e = Str.size();
		jkorous-appleUnsubmitted Not Done Reply Inline Actions Wouldn't auto or typename T::size_type instead of unsigned be more appropriate here? Both of your supported use cases have this member type. http://llvm.org/doxygen/classllvm_1_1StringRef.html#a54e59e2d53e5ee736ee060be7c457508 http://llvm.org/doxygen/classllvm_1_1SmallVectorImpl.html#acc72e8846802a1e703501219cf19458e jkorous-apple: Wouldn't auto or typename T::size_type instead of unsigned be more appropriate here?
		while (i < e) {
		if (Str[i] == '\\' \|\| Str[i] == Quote) {
		Str.insert(Str.begin() + i, '\\');
		i += 2;
		++e;
		} else if (Str[i] == '\n' \|\| Str[i] == '\r') {
		// Replace '\n', '\r', '\r\n', and '\n\r' to '\\' followed by 'n'.
		unsigned Size = 1;
		if ((i < e - 1) && (Str[i + 1] == '\n' \|\| Str[i + 1] == '\r') &&
		Str[i] != Str[i + 1])
		Size += 1;

		Str.erase(Str.begin() + i, Str.begin() + i + Size);
		Str.insert(Str.begin() + i, '\\');
		jkorous-appleUnsubmitted Not Done Reply Inline Actions I am just wondering if potential performance benefit of counting all the extra space in advance and resizing the string just once might be interesting here. Basically with current approach characters at the end of the string are moved as many times as there are endlines in the string. jkorous-apple: I am just wondering if potential performance benefit of counting all the extra space in advance…
		Str.insert(Str.begin() + i + 1, 'n');
		i += 2;
		e += (2 - Size);
		} else
		++i;
		}
		}

std::string Lexer::Stringify(StringRef Str, bool Charify) {		std::string Lexer::Stringify(StringRef Str, bool Charify) {
std::string Result = Str;		std::string Result = Str;
char Quote = Charify ? '\'' : '"';		char Quote = Charify ? '\'' : '"';
for (unsigned i = 0, e = Result.size(); i != e; ++i) {		StringifyImpl(Result, Quote);
if (Result[i] == '\\' \|\| Result[i] == Quote) {
Result.insert(Result.begin()+i, '\\');
++i; ++e;
}
}
return Result;		return Result;
}		}
		vsapsaiUnsubmitted Not Done Reply Inline Actions `getEscapedNewLineSize` mentions P[-1] is known to be a "\" or a trigraph equivalent on entry to this function. Is this precondition correct in this case? And `std::string::substr` creates a copy of a substring. It is inefficient in the loop and looks like you don't really need `std::string` here anyway. vsapsai: `getEscapedNewLineSize` mentions > P[-1] is known to be a "\" or a trigraph equivalent on entry…

/// Stringify - Convert the specified string into a C string by escaping '\'
/// and " characters. This does not add surrounding ""'s to the string.
void Lexer::Stringify(SmallVectorImpl<char> &Str) {		void Lexer::Stringify(SmallVectorImpl<char> &Str) {
for (unsigned i = 0, e = Str.size(); i != e; ++i) {		StringifyImpl(Str, '"');
if (Str[i] == '\\' \|\| Str[i] == '"') {
Str.insert(Str.begin()+i, '\\');
++i; ++e;
}
}
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Token Spelling		// Token Spelling
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

/// \brief Slow case of getSpelling. Extract the characters comprising the		/// \brief Slow case of getSpelling. Extract the characters comprising the
/// spelling of this token from the provided input buffer.		/// spelling of this token from the provided input buffer.
▲ Show 20 Lines • Show All 119 Lines • ▼ Show 20 Lines
/// to allocate enough space for the token, which is guaranteed to be at least		/// to allocate enough space for the token, which is guaranteed to be at least
/// Tok.getLength() bytes long. The actual length of the token is returned.		/// Tok.getLength() bytes long. The actual length of the token is returned.
///		///
/// Note that this method may do two possible things: it may either fill in		/// Note that this method may do two possible things: it may either fill in
/// the buffer specified with characters, or it may change the input pointer		/// the buffer specified with characters, or it may change the input pointer
/// to point to a constant buffer with the data already in it (avoiding a		/// to point to a constant buffer with the data already in it (avoiding a
/// copy). The caller is not allowed to modify the returned buffer pointer		/// copy). The caller is not allowed to modify the returned buffer pointer
/// if an internal buffer is returned.		/// if an internal buffer is returned.
unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,		unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
const SourceManager &SourceMgr,		const SourceManager &SourceMgr,
const LangOptions &LangOpts, bool *Invalid) {		const LangOptions &LangOpts, bool *Invalid) {
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");		assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");

const char *TokStart = nullptr;		const char *TokStart = nullptr;
// NOTE: this has to be checked before testing for an IdentifierInfo.		// NOTE: this has to be checked before testing for an IdentifierInfo.
if (Tok.is(tok::raw_identifier))		if (Tok.is(tok::raw_identifier))
TokStart = Tok.getRawIdentifier().data();		TokStart = Tok.getRawIdentifier().data();
▲ Show 20 Lines • Show All 208 Lines • ▼ Show 20 Lines	PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
do {		do {
TheLexer.LexFromRawLexer(TheTok);		TheLexer.LexFromRawLexer(TheTok);

if (InPreprocessorDirective) {		if (InPreprocessorDirective) {
// If we've hit the end of the file, we're done.		// If we've hit the end of the file, we're done.
if (TheTok.getKind() == tok::eof) {		if (TheTok.getKind() == tok::eof) {
break;		break;
}		}

// If we haven't hit the end of the preprocessor directive, skip this		// If we haven't hit the end of the preprocessor directive, skip this
// token.		// token.
if (!TheTok.isAtStartOfLine())		if (!TheTok.isAtStartOfLine())
continue;		continue;

// We've passed the end of the preprocessor directive, and will look		// We've passed the end of the preprocessor directive, and will look
// at this token again below.		// at this token again below.
InPreprocessorDirective = false;		InPreprocessorDirective = false;
}		}

// Keep track of the # of lines in the preamble.		// Keep track of the # of lines in the preamble.
if (TheTok.isAtStartOfLine()) {		if (TheTok.isAtStartOfLine()) {
unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset;		unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset;

// If we were asked to limit the number of lines in the preamble,		// If we were asked to limit the number of lines in the preamble,
// and we're about to exceed that limit, we're done.		// and we're about to exceed that limit, we're done.
if (MaxLineOffset && TokOffset >= MaxLineOffset)		if (MaxLineOffset && TokOffset >= MaxLineOffset)
break;		break;
}		}

// Comments are okay; skip over them.		// Comments are okay; skip over them.
if (TheTok.getKind() == tok::comment) {		if (TheTok.getKind() == tok::comment) {
if (ActiveCommentLoc.isInvalid())		if (ActiveCommentLoc.isInvalid())
ActiveCommentLoc = TheTok.getLocation();		ActiveCommentLoc = TheTok.getLocation();
continue;		continue;
}		}

if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) {		if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) {
// This is the start of a preprocessor directive.		// This is the start of a preprocessor directive.
Token HashTok = TheTok;		Token HashTok = TheTok;
InPreprocessorDirective = true;		InPreprocessorDirective = true;
ActiveCommentLoc = SourceLocation();		ActiveCommentLoc = SourceLocation();

// Figure out which directive this is. Since we're lexing raw tokens,		// Figure out which directive this is. Since we're lexing raw tokens,
// we don't have an identifier table available. Instead, just look at		// we don't have an identifier table available. Instead, just look at
// the raw identifier to recognize and categorize preprocessor directives.		// the raw identifier to recognize and categorize preprocessor directives.
TheLexer.LexFromRawLexer(TheTok);		TheLexer.LexFromRawLexer(TheTok);
if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {		if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {
StringRef Keyword = TheTok.getRawIdentifier();		StringRef Keyword = TheTok.getRawIdentifier();
PreambleDirectiveKind PDK		PreambleDirectiveKind PDK
= llvm::StringSwitch<PreambleDirectiveKind>(Keyword)		= llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
Show All 23 Lines	if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) {
case PDK_Skipped:		case PDK_Skipped:
continue;		continue;

case PDK_Unknown:		case PDK_Unknown:
// We don't know what this directive is; stop at the '#'.		// We don't know what this directive is; stop at the '#'.
break;		break;
}		}
}		}

// We only end up here if we didn't recognize the preprocessor		// We only end up here if we didn't recognize the preprocessor
// directive or it was one that can't occur in the preamble at this		// directive or it was one that can't occur in the preamble at this
// point. Roll back the current token to the location of the '#'.		// point. Roll back the current token to the location of the '#'.
InPreprocessorDirective = false;		InPreprocessorDirective = false;
TheTok = HashTok;		TheTok = HashTok;
}		}

// We hit a token that we don't recognize as being in the		// We hit a token that we don't recognize as being in the
// "preprocessing only" part of the file, so we're no longer in		// "preprocessing only" part of the file, so we're no longer in
// the preamble.		// the preamble.
break;		break;
} while (true);		} while (true);

SourceLocation End;		SourceLocation End;
if (ActiveCommentLoc.isValid())		if (ActiveCommentLoc.isValid())
End = ActiveCommentLoc; // don't truncate a decl comment.		End = ActiveCommentLoc; // don't truncate a decl comment.
else		else
End = TheTok.getLocation();		End = TheTok.getLocation();

return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(),		return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(),
TheTok.isAtStartOfLine());		TheTok.isAtStartOfLine());
}		}

/// AdvanceToTokenCharacter - Given a location that specifies the start of a		/// AdvanceToTokenCharacter - Given a location that specifies the start of a
/// token, return a new location that specifies a character within the token.		/// token, return a new location that specifies a character within the token.
SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,		SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
unsigned CharNo,		unsigned CharNo,
const SourceManager &SM,		const SourceManager &SM,
const LangOptions &LangOpts) {		const LangOptions &LangOpts) {
// Figure out how many physical characters away the specified expansion		// Figure out how many physical characters away the specified expansion
// character is. This needs to take into consideration newlines and		// character is. This needs to take into consideration newlines and
// trigraphs.		// trigraphs.
bool Invalid = false;		bool Invalid = false;
const char *TokPtr = SM.getCharacterData(TokStart, &Invalid);		const char *TokPtr = SM.getCharacterData(TokStart, &Invalid);

// If they request the first char of the token, we're trivially done.		// If they request the first char of the token, we're trivially done.
if (Invalid \|\| (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))		if (Invalid \|\| (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
return TokStart;		return TokStart;

unsigned PhysOffset = 0;		unsigned PhysOffset = 0;

// The usual case is that tokens don't contain anything interesting. Skip		// The usual case is that tokens don't contain anything interesting. Skip
// over the uninteresting characters. If a token only consists of simple		// over the uninteresting characters. If a token only consists of simple
// chars, this method is extremely fast.		// chars, this method is extremely fast.
while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {		while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
if (CharNo == 0)		if (CharNo == 0)
return TokStart.getLocWithOffset(PhysOffset);		return TokStart.getLocWithOffset(PhysOffset);
++TokPtr;		++TokPtr;
--CharNo;		--CharNo;
++PhysOffset;		++PhysOffset;
}		}

// If we have a character that may be a trigraph or escaped newline, use a		// If we have a character that may be a trigraph or escaped newline, use a
// lexer to parse it correctly.		// lexer to parse it correctly.
for (; CharNo; --CharNo) {		for (; CharNo; --CharNo) {
unsigned Size;		unsigned Size;
Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts);		Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts);
TokPtr += Size;		TokPtr += Size;
PhysOffset += Size;		PhysOffset += Size;
}		}

// Final detail: if we end up on an escaped newline, we want to return the		// Final detail: if we end up on an escaped newline, we want to return the
// location of the actual byte of the token. For example foo\<newline>bar		// location of the actual byte of the token. For example foo\<newline>bar
// advanced by 3 should return the location of b, not of \\. One compounding		// advanced by 3 should return the location of b, not of \\. One compounding
// detail of this is that the escape may be made by a trigraph.		// detail of this is that the escape may be made by a trigraph.
if (!Lexer::isObviouslySimpleCharacter(*TokPtr))		if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;		PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;

return TokStart.getLocWithOffset(PhysOffset);		return TokStart.getLocWithOffset(PhysOffset);
}		}

/// \brief Computes the source location just past the end of the		/// \brief Computes the source location just past the end of the
/// token at this source location.		/// token at this source location.
///		///
/// This routine can be used to produce a source location that		/// This routine can be used to produce a source location that
/// points just past the end of the token referenced by \p Loc, and		/// points just past the end of the token referenced by \p Loc, and
Show All 18 Lines	if (Offset > 0 \|\| !isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
return SourceLocation(); // Points inside the macro expansion.		return SourceLocation(); // Points inside the macro expansion.
}		}

unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts);		unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
if (Len > Offset)		if (Len > Offset)
Len = Len - Offset;		Len = Len - Offset;
else		else
return Loc;		return Loc;

return Loc.getLocWithOffset(Len);		return Loc.getLocWithOffset(Len);
}		}

/// \brief Returns true if the given MacroID location points at the first		/// \brief Returns true if the given MacroID location points at the first
/// token of the macro expansion.		/// token of the macro expansion.
bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,		bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
const SourceManager &SM,		const SourceManager &SM,
const LangOptions &LangOpts,		const LangOptions &LangOpts,
▲ Show 20 Lines • Show All 180 Lines • ▼ Show 20 Lines	while (true) {
const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);		const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);
const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();		const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
Loc = Expansion.getExpansionLocStart();		Loc = Expansion.getExpansionLocStart();
if (!Expansion.isMacroArgExpansion())		if (!Expansion.isMacroArgExpansion())
break;		break;

// For macro arguments we need to check that the argument did not come		// For macro arguments we need to check that the argument did not come
// from an inner macro, e.g: "MAC1( MAC2(foo) )"		// from an inner macro, e.g: "MAC1( MAC2(foo) )"

// Loc points to the argument id of the macro definition, move to the		// Loc points to the argument id of the macro definition, move to the
// macro expansion.		// macro expansion.
Loc = SM.getImmediateExpansionRange(Loc).first;		Loc = SM.getImmediateExpansionRange(Loc).first;
SourceLocation SpellLoc = Expansion.getSpellingLoc();		SourceLocation SpellLoc = Expansion.getSpellingLoc();
if (SpellLoc.isFileID())		if (SpellLoc.isFileID())
break; // No inner macro.		break; // No inner macro.

// If spelling location resides in the same FileID as macro expansion		// If spelling location resides in the same FileID as macro expansion
▲ Show 20 Lines • Show All 813 Lines • ▼ Show 20 Lines	Diag(BufferPtr, getLangOpts().CPlusPlus
: diag::warn_c99_compat_unicode_literal);		: diag::warn_c99_compat_unicode_literal);

char C = getAndAdvanceChar(CurPtr, Result);		char C = getAndAdvanceChar(CurPtr, Result);
while (C != '"') {		while (C != '"') {
// Skip escaped characters. Escaped newlines will already be processed by		// Skip escaped characters. Escaped newlines will already be processed by
// getAndAdvanceChar.		// getAndAdvanceChar.
if (C == '\\')		if (C == '\\')
C = getAndAdvanceChar(CurPtr, Result);		C = getAndAdvanceChar(CurPtr, Result);

if (C == '\n' \|\| C == '\r' \|\| // Newline.		if (C == '\n' \|\| C == '\r' \|\| // Newline.
(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.		(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)		if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;		Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
FormTokenWithChars(Result, CurPtr-1, tok::unknown);		FormTokenWithChars(Result, CurPtr-1, tok::unknown);
return true;		return true;
}		}

if (C == 0) {		if (C == 0) {
if (isCodeCompletionPoint(CurPtr-1)) {		if (isCodeCompletionPoint(CurPtr-1)) {
PP->CodeCompleteNaturalLanguage();		PP->CodeCompleteNaturalLanguage();
FormTokenWithChars(Result, CurPtr-1, tok::unknown);		FormTokenWithChars(Result, CurPtr-1, tok::unknown);
cutOffLexing();		cutOffLexing();
return true;		return true;
}		}

▲ Show 20 Lines • Show All 412 Lines • ▼ Show 20 Lines	if (!ParsingPreprocessorDirective \|\| LexingRawMode)
return true;		return true;

// If this Line-style comment is in a macro definition, transmogrify it into		// If this Line-style comment is in a macro definition, transmogrify it into
// a C-style block comment.		// a C-style block comment.
bool Invalid = false;		bool Invalid = false;
std::string Spelling = PP->getSpelling(Result, &Invalid);		std::string Spelling = PP->getSpelling(Result, &Invalid);
if (Invalid)		if (Invalid)
return true;		return true;

assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?");		assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?");
Spelling[1] = ''; // Change prefix to "/".		Spelling[1] = ''; // Change prefix to "/".
Spelling += "*/"; // add suffix.		Spelling += "*/"; // add suffix.

Result.setKind(tok::comment);		Result.setKind(tok::comment);
PP->CreateString(Spelling, Result,		PP->CreateString(Spelling, Result,
Result.getLocation(), Result.getLocation());		Result.getLocation(), Result.getLocation());
return true;		return true;
▲ Show 20 Lines • Show All 309 Lines • ▼ Show 20 Lines	if (ParsingPreprocessorDirective) {
// Update the location of token as well as BufferPtr.		// Update the location of token as well as BufferPtr.
FormTokenWithChars(Result, CurPtr, tok::eod);		FormTokenWithChars(Result, CurPtr, tok::eod);

// Restore comment saving mode, in case it was disabled for directive.		// Restore comment saving mode, in case it was disabled for directive.
if (PP)		if (PP)
resetExtendedTokenMode();		resetExtendedTokenMode();
return true; // Have a token.		return true; // Have a token.
}		}

// If we are in raw mode, return this event as an EOF token. Let the caller		// If we are in raw mode, return this event as an EOF token. Let the caller
// that put us in raw mode handle the event.		// that put us in raw mode handle the event.
if (isLexingRawMode()) {		if (isLexingRawMode()) {
Result.startToken();		Result.startToken();
BufferPtr = BufferEnd;		BufferPtr = BufferEnd;
FormTokenWithChars(Result, BufferEnd, tok::eof);		FormTokenWithChars(Result, BufferEnd, tok::eof);
return true;		return true;
}		}

if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) {		if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) {
PP->setRecordedPreambleConditionalStack(ConditionalStack);		PP->setRecordedPreambleConditionalStack(ConditionalStack);
ConditionalStack.clear();		ConditionalStack.clear();
}		}

// Issue diagnostics for unterminated #if and missing newline.		// Issue diagnostics for unterminated #if and missing newline.

// If we are in a #if directive, emit an error.		// If we are in a #if directive, emit an error.
▲ Show 20 Lines • Show All 95 Lines • ▼ Show 20 Lines
/// control conflict marker like '<<<<<<<', recognize it as such, emit an error		/// control conflict marker like '<<<<<<<', recognize it as such, emit an error
/// and recover nicely. This returns true if it is a conflict marker and false		/// and recover nicely. This returns true if it is a conflict marker and false
/// if not.		/// if not.
bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {		bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
// Only a conflict marker if it starts at the beginning of a line.		// Only a conflict marker if it starts at the beginning of a line.
if (CurPtr != BufferStart &&		if (CurPtr != BufferStart &&
CurPtr[-1] != '\n' && CurPtr[-1] != '\r')		CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
return false;		return false;

// Check to see if we have <<<<<<< or >>>>.		// Check to see if we have <<<<<<< or >>>>.
if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith("<<<<<<<") &&		if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith("<<<<<<<") &&
!StringRef(CurPtr, BufferEnd - CurPtr).startswith(">>>> "))		!StringRef(CurPtr, BufferEnd - CurPtr).startswith(">>>> "))
return false;		return false;

// If we have a situation where we don't care about conflict markers, ignore		// If we have a situation where we don't care about conflict markers, ignore
// it.		// it.
if (CurrentConflictMarkerState \|\| isLexingRawMode())		if (CurrentConflictMarkerState \|\| isLexingRawMode())
return false;		return false;

ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce;		ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce;

// Check to see if there is an ending marker somewhere in the buffer at the		// Check to see if there is an ending marker somewhere in the buffer at the
// start of a line to terminate this conflict marker.		// start of a line to terminate this conflict marker.
if (FindConflictEnd(CurPtr, BufferEnd, Kind)) {		if (FindConflictEnd(CurPtr, BufferEnd, Kind)) {
// We found a match. We are really in a conflict marker.		// We found a match. We are really in a conflict marker.
// Diagnose this, and ignore to the end of line.		// Diagnose this, and ignore to the end of line.
Diag(CurPtr, diag::err_conflict_marker);		Diag(CurPtr, diag::err_conflict_marker);
CurrentConflictMarkerState = Kind;		CurrentConflictMarkerState = Kind;

// Skip ahead to the end of line. We know this exists because the		// Skip ahead to the end of line. We know this exists because the
// end-of-conflict marker starts with \r or \n.		// end-of-conflict marker starts with \r or \n.
while (CurPtr != '\r' && CurPtr != '\n') {		while (CurPtr != '\r' && CurPtr != '\n') {
assert(CurPtr != BufferEnd && "Didn't find end of line");		assert(CurPtr != BufferEnd && "Didn't find end of line");
++CurPtr;		++CurPtr;
}		}
BufferPtr = CurPtr;		BufferPtr = CurPtr;
return true;		return true;
}		}

// No end of conflict marker found.		// No end of conflict marker found.
return false;		return false;
}		}

/// HandleEndOfConflictMarker - If this is a '====' or '\|\|\|\|' or '>>>>', or if		/// HandleEndOfConflictMarker - If this is a '====' or '\|\|\|\|' or '>>>>', or if
/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it		/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it
/// is the end of a conflict marker. Handle it by ignoring up until the end of		/// is the end of a conflict marker. Handle it by ignoring up until the end of
/// the line. This returns true if it is a conflict marker and false if not.		/// the line. This returns true if it is a conflict marker and false if not.
bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {		bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
// Only a conflict marker if it starts at the beginning of a line.		// Only a conflict marker if it starts at the beginning of a line.
if (CurPtr != BufferStart &&		if (CurPtr != BufferStart &&
CurPtr[-1] != '\n' && CurPtr[-1] != '\r')		CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
return false;		return false;

// If we have a situation where we don't care about conflict markers, ignore		// If we have a situation where we don't care about conflict markers, ignore
// it.		// it.
if (!CurrentConflictMarkerState \|\| isLexingRawMode())		if (!CurrentConflictMarkerState \|\| isLexingRawMode())
return false;		return false;

// Check to see if we have the marker (4 characters in a row).		// Check to see if we have the marker (4 characters in a row).
for (unsigned i = 1; i != 4; ++i)		for (unsigned i = 1; i != 4; ++i)
if (CurPtr[i] != CurPtr[0])		if (CurPtr[i] != CurPtr[0])
return false;		return false;

// If we do have it, search for the end of the conflict marker. This could		// If we do have it, search for the end of the conflict marker. This could
// fail if it got skipped with a '#if 0' or something. Note that CurPtr might		// fail if it got skipped with a '#if 0' or something. Note that CurPtr might
// be the end of conflict marker.		// be the end of conflict marker.
if (const char *End = FindConflictEnd(CurPtr, BufferEnd,		if (const char *End = FindConflictEnd(CurPtr, BufferEnd,
CurrentConflictMarkerState)) {		CurrentConflictMarkerState)) {
CurPtr = End;		CurPtr = End;

// Skip ahead to the end of line.		// Skip ahead to the end of line.
while (CurPtr != BufferEnd && CurPtr != '\r' && CurPtr != '\n')		while (CurPtr != BufferEnd && CurPtr != '\r' && CurPtr != '\n')
++CurPtr;		++CurPtr;

BufferPtr = CurPtr;		BufferPtr = CurPtr;

// No longer in the conflict marker.		// No longer in the conflict marker.
CurrentConflictMarkerState = CMK_None;		CurrentConflictMarkerState = CMK_None;
return true;		return true;
}		}

return false;		return false;
}		}

static const char findPlaceholderEnd(const char CurPtr,		static const char findPlaceholderEnd(const char CurPtr,
const char *BufferEnd) {		const char *BufferEnd) {
if (CurPtr == BufferEnd)		if (CurPtr == BufferEnd)
return nullptr;		return nullptr;
BufferEnd -= 1; // Scan until the second last character.		BufferEnd -= 1; // Scan until the second last character.
▲ Show 20 Lines • Show All 292 Lines • ▼ Show 20 Lines	if (!isLexingRawMode())
Diag(CurPtr-1, diag::null_in_file);		Diag(CurPtr-1, diag::null_in_file);
Result.setFlag(Token::LeadingSpace);		Result.setFlag(Token::LeadingSpace);
if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))		if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
return true; // KeepWhitespaceMode		return true; // KeepWhitespaceMode

// We know the lexer hasn't changed, so just try again with this lexer.		// We know the lexer hasn't changed, so just try again with this lexer.
// (We manually eliminate the tail call to avoid recursion.)		// (We manually eliminate the tail call to avoid recursion.)
goto LexNextToken;		goto LexNextToken;

case 26: // DOS & CP/M EOF: "^Z".		case 26: // DOS & CP/M EOF: "^Z".
// If we're in Microsoft extensions mode, treat this as end of file.		// If we're in Microsoft extensions mode, treat this as end of file.
if (LangOpts.MicrosoftExt) {		if (LangOpts.MicrosoftExt) {
if (!isLexingRawMode())		if (!isLexingRawMode())
Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);		Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
return LexEndOfFile(Result, CurPtr-1);		return LexEndOfFile(Result, CurPtr-1);
}		}

// If Microsoft extensions are disabled, this is just random garbage.		// If Microsoft extensions are disabled, this is just random garbage.
Kind = tok::unknown;		Kind = tok::unknown;
break;		break;

case '\r':		case '\r':
if (CurPtr[0] == '\n')		if (CurPtr[0] == '\n')
Char = getAndAdvanceChar(CurPtr, Result);		Char = getAndAdvanceChar(CurPtr, Result);
LLVM_FALLTHROUGH;		LLVM_FALLTHROUGH;
case '\n':		case '\n':
// If we are inside a preprocessor directive and we see the end of line,		// If we are inside a preprocessor directive and we see the end of line,
// we know we are done with the directive, so return an EOD token.		// we know we are done with the directive, so return an EOD token.
if (ParsingPreprocessorDirective) {		if (ParsingPreprocessorDirective) {
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines	if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
return true; // There is a token to return.		return true; // There is a token to return.
goto SkipIgnoredUnits;		goto SkipIgnoredUnits;
} else if (isHorizontalWhitespace(*CurPtr)) {		} else if (isHorizontalWhitespace(*CurPtr)) {
goto SkipHorizontalWhitespace;		goto SkipHorizontalWhitespace;
}		}
// We only saw whitespace, so just try again with this lexer.		// We only saw whitespace, so just try again with this lexer.
// (We manually eliminate the tail call to avoid recursion.)		// (We manually eliminate the tail call to avoid recursion.)
goto LexNextToken;		goto LexNextToken;

// C99 6.4.4.1: Integer Constants.		// C99 6.4.4.1: Integer Constants.
// C99 6.4.4.2: Floating Constants.		// C99 6.4.4.2: Floating Constants.
case '0': case '1': case '2': case '3': case '4':		case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':		case '5': case '6': case '7': case '8': case '9':
// Notify MIOpt that we read a non-whitespace/non-comment token.		// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();		MIOpt.ReadToken();
return LexNumericConstant(Result, CurPtr);		return LexNumericConstant(Result, CurPtr);

▲ Show 20 Lines • Show All 482 Lines • ▼ Show 20 Lines	case ';':
Kind = tok::semi;		Kind = tok::semi;
break;		break;
case '=':		case '=':
Char = getCharAndSize(CurPtr, SizeTmp);		Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '=') {		if (Char == '=') {
// If this is '====' and we're in a conflict marker, ignore it.		// If this is '====' and we're in a conflict marker, ignore it.
if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))		if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
goto LexNextToken;		goto LexNextToken;

Kind = tok::equalequal;		Kind = tok::equalequal;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);		CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else {		} else {
Kind = tok::equal;		Kind = tok::equal;
}		}
break;		break;
case ',':		case ',':
Kind = tok::comma;		Kind = tok::comma;
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	if (Status == llvm::conversionOK) {
return true; // KeepWhitespaceMode		return true; // KeepWhitespaceMode

// We only saw whitespace, so just try again with this lexer.		// We only saw whitespace, so just try again with this lexer.
// (We manually eliminate the tail call to avoid recursion.)		// (We manually eliminate the tail call to avoid recursion.)
goto LexNextToken;		goto LexNextToken;
}		}
return LexUnicode(Result, CodePoint, CurPtr);		return LexUnicode(Result, CodePoint, CurPtr);
}		}

if (isLexingRawMode() \|\| ParsingPreprocessorDirective \|\|		if (isLexingRawMode() \|\| ParsingPreprocessorDirective \|\|
PP->isPreprocessedOutput()) {		PP->isPreprocessedOutput()) {
++CurPtr;		++CurPtr;
Kind = tok::unknown;		Kind = tok::unknown;
break;		break;
}		}

// Non-ASCII characters tend to creep into source code unintentionally.		// Non-ASCII characters tend to creep into source code unintentionally.
Show All 34 Lines

test/Preprocessor/macro_raw_string.cpp

This file was added.

				// RUN: %clang_cc1 -E -std=c++11 %s -o %t
				// RUN: %clang_cc1 %t

				#define FOO(str) foo(#str)

				extern void foo(const char *str);

				void bar() {
				FOO(R"(foo
				bar)");
				}

unittests/Lex/LexerTest.cpp

Show All 31 Lines
// The test fixture.		// The test fixture.
class LexerTest : public ::testing::Test {		class LexerTest : public ::testing::Test {
protected:		protected:
LexerTest()		LexerTest()
: FileMgr(FileMgrOpts),		: FileMgr(FileMgrOpts),
DiagID(new DiagnosticIDs()),		DiagID(new DiagnosticIDs()),
Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),		Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
SourceMgr(Diags, FileMgr),		SourceMgr(Diags, FileMgr),
TargetOpts(new TargetOptions)		TargetOpts(new TargetOptions)
{		{
TargetOpts->Triple = "x86_64-apple-darwin11.1.0";		TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);		Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
}		}

std::unique_ptr<Preprocessor> CreatePP(StringRef Source,		std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
TrivialModuleLoader &ModLoader) {		TrivialModuleLoader &ModLoader) {
std::unique_ptr<llvm::MemoryBuffer> Buf =		std::unique_ptr<llvm::MemoryBuffer> Buf =
▲ Show 20 Lines • Show All 424 Lines • ▼ Show 20 Lines	TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
}		}
}		}

TEST_F(LexerTest, AvoidPastEndOfStringDereference) {		TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
std::vector<Token> LexedTokens = Lex(" // \\\n");		std::vector<Token> LexedTokens = Lex(" // \\\n");
EXPECT_TRUE(LexedTokens.empty());		EXPECT_TRUE(LexedTokens.empty());
}		}

		TEST_F(LexerTest, StringizingRasString) {
		// For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
		std::string String1 = R"(foo
		{"bar":[]}
		baz)";
		// For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
		SmallString<128> String2;
		String2 += String1.c_str();

		// Corner cases.
		std::string String3 = R"(\
		\n
		\\n
		\\)";
		SmallString<128> String4;
		String4 += String3.c_str();
		std::string String5 = R"(a\


		\\b)";
		SmallString<128> String6;
		String6 += String5.c_str();

		String1 = Lexer::Stringify(StringRef(String1));
		Lexer::Stringify(String2);
		String3 = Lexer::Stringify(StringRef(String3));
		Lexer::Stringify(String4);
		String5 = Lexer::Stringify(StringRef(String5));
		Lexer::Stringify(String6);

		EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)");
		EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)");
		EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)");
		EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)");
		EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)");
		EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)");
		}

} // anonymous namespace		} // anonymous namespace

This is an archive of the discontinued LLVM Phabricator instance.

Stringizing raw string literals containing newline
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 123658

include/clang/Lex/Lexer.h

lib/Lex/Lexer.cpp

test/Preprocessor/macro_raw_string.cpp

unittests/Lex/LexerTest.cpp

This is an archive of the discontinued LLVM Phabricator instance.

Stringizing raw string literals containing newlineClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 123658

include/clang/Lex/Lexer.h

lib/Lex/Lexer.cpp

test/Preprocessor/macro_raw_string.cpp

unittests/Lex/LexerTest.cpp

Stringizing raw string literals containing newline
ClosedPublic