Index: llvm/lib/TableGen/TGLexer.cpp =================================================================== --- llvm/lib/TableGen/TGLexer.cpp +++ llvm/lib/TableGen/TGLexer.cpp @@ -109,15 +109,23 @@ default: return (unsigned char)CurChar; case 0: { - // A nul character in the stream is either the end of the current buffer or - // a random nul in the file. Disambiguate that here. - if (CurPtr-1 != CurBuf.end()) - return 0; // Just whitespace. - - // Otherwise, return end of file. - --CurPtr; // Another call to lex will return EOF again. - return EOF; + // A NUL character in the stream is either the end of the current buffer or + // a spurious NUL in the file. Disambiguate that here. + if (CurPtr-1 == CurBuf.end()) { + --CurPtr; // Arrange for another call to return EOF again. + return EOF; + } + PrintError(getLoc(), "NUL character is invalid in source; treated as space"); + return ' '; } + +//// if (CurPtr-1 != CurBuf.end()) +//// return 0; // Just whitespace. +//// +//// // Otherwise, return end of file. +//// --CurPtr; // Another call to lex will return EOF again. +//// return EOF; +//// } case '\n': case '\r': // Handle the newline character by ignoring it and incrementing the line @@ -197,7 +205,7 @@ PrintFatalError("getNextChar() must never return '\r'"); return tgtok::Error; - case 0: +//// case 0: case ' ': case '\t': // Ignore whitespace. @@ -415,24 +423,31 @@ return false; } +/// Skip over the comment by finding the next CR or LF. Or we may end up +/// at the end of the buffer. void TGLexer::SkipBCPLComment() { ++CurPtr; // skip the second slash. - while (true) { - switch (*CurPtr) { - case '\n': - case '\r': - return; // Newline is end of comment. - case 0: - // If this is the end of the buffer, end the comment. - if (CurPtr == CurBuf.end()) - return; - break; - } - // Otherwise, skip the character. - ++CurPtr; - } + auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data()); + CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos; } +//// ++CurPtr; // skip the second slash. +//// while (true) { +//// switch (*CurPtr) { +//// case '\n': +//// case '\r': +//// return; // Newline is end of comment. +//// case 0: +//// // If this is the end of the buffer, end the comment. +//// if (CurPtr == CurBuf.end()) +//// return; +//// break; +//// } +//// // Otherwise, skip the character. +//// ++CurPtr; +//// } +////} + /// SkipCComment - This skips C-style /**/ comments. The only difference from C /// is that we allow nesting. bool TGLexer::SkipCComment() {