diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -108,16 +108,19 @@ switch (CurChar) { default: return (unsigned char)CurChar; + case 0: { - // A nul character in the stream is either the end of the current buffer or - // a random nul in the file. Disambiguate that here. - if (CurPtr-1 != CurBuf.end()) - return 0; // Just whitespace. - - // Otherwise, return end of file. - --CurPtr; // Another call to lex will return EOF again. - return EOF; + // A NUL character in the stream is either the end of the current buffer or + // a spurious NUL in the file. Disambiguate that here. + if (CurPtr - 1 == CurBuf.end()) { + --CurPtr; // Arrange for another call to return EOF again. + return EOF; + } + PrintError(getLoc(), + "NUL character is invalid in source; treated as space"); + return ' '; } + case '\n': case '\r': // Handle the newline character by ignoring it and incrementing the line @@ -197,7 +200,6 @@ PrintFatalError("getNextChar() must never return '\r'"); return tgtok::Error; - case 0: case ' ': case '\t': // Ignore whitespace. @@ -415,22 +417,12 @@ return false; } +/// SkipBCPLComment - Skip over the comment by finding the next CR or LF. +/// Or we may end up at the end of the buffer. void TGLexer::SkipBCPLComment() { ++CurPtr; // skip the second slash. - while (true) { - switch (*CurPtr) { - case '\n': - case '\r': - return; // Newline is end of comment. - case 0: - // If this is the end of the buffer, end the comment. - if (CurPtr == CurBuf.end()) - return; - break; - } - // Otherwise, skip the character. - ++CurPtr; - } + auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data()); + CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos; } /// SkipCComment - This skips C-style /**/ comments. The only difference from C diff --git a/llvm/test/TableGen/nul-char.td b/llvm/test/TableGen/nul-char.td new file mode 100644 --- /dev/null +++ b/llvm/test/TableGen/nul-char.td @@ -0,0 +1,28 @@ +// RUN: sed -e 's/@/\x00/g' %s > %t +// RUN: not llvm-tblgen -DERROR1 %t 2>&1 | FileCheck --check-prefix=ERROR1 %s + +// This test file checks that NUL is treated as an invalid character. +// Each at sign is replaced with a NUL before running the test. + +#ifdef ERROR1 + +// ERROR1: error: NUL character is invalid in source; treated as space +// ERROR1: error: NUL character is invalid in source; treated as space +// ERROR1: error: NUL character is invalid in source; treated as space +// ERROR1: error: NUL character is invalid in source; treated as space +// ERROR1: error: expected ';' after declaration + +def Foo@ { + int @ ID = 42; +} + +@ + +// Comment with a NUL @ there. They are ignored in comments. + +def Bar { + int Biggie = 12345@789; +} + +#endif +