Index: docs/TableGen/LangRef.rst =================================================================== --- docs/TableGen/LangRef.rst +++ docs/TableGen/LangRef.rst @@ -33,7 +33,7 @@ ================ TableGen supports BCPL (``// ...``) and nestable C-style (``/* ... */``) -comments. +comments. TableGen also provides simple `Preprocessing Support`_. The following is a listing of the basic punctuation tokens:: @@ -448,3 +448,49 @@ BaseMultiClassList: `MultiClassID` ("," `MultiClassID`)* MultiClassID: `TokIdentifier` MultiClassObject: `Def` | `Defm` | `Let` | `Foreach` + +Preprocessing Support +===================== + +TableGen's embedded preprocessor supports the following directives: + +.. productionlist:: + LineBegin: ^ + LineEnd: "\n" | "\r" | EOF + WhiteSpace: " " | "\t" + CStyleComment: "/*" (.* - "*/") "*/" + BCPLComment: "//" (.* - `LineEnd`) `LineEnd` + WhiteSpaceOrCStyleComment: `WhiteSpace` | `CStyleComment` + WhiteSpaceOrAnyComment: `WhiteSpace` | `CStyleComment` | `BCPLComment` + MacroName: `ualpha` (`ualpha` | "0"..."9")* + PrepDefine: `LineBegin` (`WhiteSpaceOrCStyleComment`)* + : "#define" (`WhiteSpace`)+ `MacroName` + : (`WhiteSpaceOrAnyComment`)* `LineEnd` + PrepIfdef: `LineBegin` (`WhiteSpaceOrCStyleComment`)* + : "#ifdef" (`WhiteSpace`)+ `MacroName` + : (`WhiteSpaceOrAnyComment`)* `LineEnd` + PrepElse: `LineBegin` (`WhiteSpaceOrCStyleComment`)* + : "#else" (`WhiteSpaceOrAnyComment`)* `LineEnd` + PrepEndif: `LineBegin` (`WhiteSpaceOrCStyleComment`)* + : "#endif" (`WhiteSpaceOrAnyComment`)* `LineEnd` + PrepRegContentException: `PredIfdef` | `PredElse` | `PredEndif` | EOF + PrepRegion: .* - `PrepRegContentException` + :| `PrepIfDef` + : (`PrepRegion`)* + : [`PrepElse`] + : (`PrepRegion`)* + : `PrepEndif` + +:token:`PrepRegion` may occur anywhere in a TD file, as long as it matches +the grammar specification. + +:token:`PrepDefine` allows defining a :token:`MacroName` so that any following +:token:`PrepIfdef` - :token:`PrepElse` preprocessing region part and +:token:`PrepIfdef` - :token:`PrepEndif` preprocessing region +are enabled for TableGen tokens parsing. + +A preprocessing region, starting (i.e. having its :token:`PrepIfdef`) in a file, +must end (i.e. have its :token:`PrepEndif`) in the same file. + +A :token:`MacroName` may be defined externally by using ``{ -D }`` +option of TableGen. Index: lib/TableGen/Main.cpp =================================================================== --- lib/TableGen/Main.cpp +++ lib/TableGen/Main.cpp @@ -46,6 +46,10 @@ IncludeDirs("I", cl::desc("Directory of include files"), cl::value_desc("directory"), cl::Prefix); +static cl::list +MacroNames("D", cl::desc("Name of the macro to be defined"), + cl::value_desc("macro name"), cl::Prefix); + static int reportError(const char *ProgName, Twine Msg) { errs() << ProgName << ": " << Msg; errs().flush(); @@ -91,7 +95,7 @@ // it later. SrcMgr.setIncludeDirs(IncludeDirs); - TGParser Parser(SrcMgr, Records); + TGParser Parser(SrcMgr, MacroNames, Records); if (Parser.ParseFile()) return 1; Index: lib/TableGen/TGLexer.h =================================================================== --- lib/TableGen/TGLexer.h +++ lib/TableGen/TGLexer.h @@ -14,11 +14,14 @@ #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H #define LLVM_LIB_TABLEGEN_TGLEXER_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/SMLoc.h" #include #include +#include #include namespace llvm { @@ -59,7 +62,11 @@ BinaryIntVal, // String valued tokens. - Id, StrVal, VarName, CodeFragment + Id, StrVal, VarName, CodeFragment, + + // Preprocessing tokens for internal usage by the lexer. + // They are never returned as a result of Lex(). + Ifdef, Else, Endif, Define }; } @@ -87,10 +94,10 @@ DependenciesMapTy Dependencies; public: - TGLexer(SourceMgr &SrcMgr); + TGLexer(SourceMgr &SrcMgr, ArrayRef Macros); tgtok::TokKind Lex() { - return CurCode = LexToken(); + return CurCode = LexToken(CurPtr == CurBuf.begin()); } const DependenciesMapTy &getDependencies() const { @@ -119,12 +126,12 @@ private: /// LexToken - Read the next token and return its code. - tgtok::TokKind LexToken(); + tgtok::TokKind LexToken(bool FileOrLineStart = false); tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg); - int getNextChar(); - int peekNextChar(int Index); + int getNextChar(bool *LeftPreviousFile = nullptr); + int peekNextChar(int Index) const; void SkipBCPLComment(); bool SkipCComment(); tgtok::TokKind LexIdentifier(); @@ -134,6 +141,223 @@ tgtok::TokKind LexNumber(); tgtok::TokKind LexBracket(); tgtok::TokKind LexExclaim(); + + // *** Structures and methods for preprocessing support *** + + // A set of macro names that are defined either via command line or + // by using: + // #define NAME + StringSet<> DefinedMacros; + + // Each of #ifdef and #else directives has a descriptor associated + // with it. + struct PreprocessorControlDesc { + // Either tgtok::Ifdef or tgtok::Else. + tgtok::TokKind Kind; + + // True, if the condition for this directive is true, false - otherwise. + // Examples: + // #ifdef NAME : true, if NAME is defined, false - otherwise. + // ... + // #else : false, if NAME is defined, true - otherwise. + bool IsDefined; + + // Pointer into CurBuf to the beginning of the preprocessing directive + // word, e.g.: + // #ifdef NAME + // ^ - SrcPos + const char *SrcPos; + }; + + // An ordered list of preprocessing controls defined by #ifdef/#else + // directives that are in effect currently. This is actually a stack, + // but we use vector container to traverse it in the reverse order + // in prepIsProcessingEnabled(). + // + // For each #ifdef we add an element to the control stack. + // For each #else we replace the top element with a descriptor + // with an inverted IsDefined value. + // For each #endif we pop the top element from the control stack. + // + // When CurPtr reaches the current buffer's end, the control stack + // must be empty, i.e. #ifdef and the corresponding #endif + // must be located in the same file. + std::unique_ptr > PreprocessorControl; + + // + // We want to disallow code like this: + // file1.td: + // #define NAME + // #ifdef NAME + // include "file2.td" + // EOF + // file2.td: + // #endif + // EOF + // + // To do this, we clear the preprocessing control stack on entry + // to each of the included file. PrepIncludeStack is used to store + // preprocessing control stacks for the parent files. + std::vector > > + PrepIncludeStack; + + // Allocate new stack of preprocessing controls. Updates PreprocessorControl + // to point to the new stack. + void prepNewPreprocessorControl(); + + // Push the current preprocessor control stack to the include stack, + // and set PreprocessorControl to point to a newly allocated preprocessor + // control stack. + void prepEnterInclude(); + + // Set PreprocessorControl to point to the preprocessor control + // stack on the top of the include stack. Pop the include stack. + void prepExitInclude(); + + // Look ahead for a preprocessing directive starting from CurPtr. The caller + // must only call this method, if *(CurPtr - 1) is '#'. If the method matches + // a preprocessing directive word followed by a whitespace, then it returns + // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define. + // + // CurPtr is not adjusted by this method. + tgtok::TokKind prepIsDirective() const; + + // Given a preprocessing token kind, adjusts CurPtr to the end + // of the preprocessing directive word. Returns true, unless + // an unsupported token kind is passed in. + // + // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective() + // to avoid adjusting CurPtr before we are sure that '#' is followed + // by a preprocessing directive. If it is not, then we fall back to + // tgtok::paste interpretation of '#'. + bool prepEatPreprocessorDirective(tgtok::TokKind Kind); + + // The main "exit" point from the token parsing to preprocessor. + // + // The method is called for CurPtr, when prepIsDirective() returns + // true. The first parameter matches the result of prepIsDirective(), + // denoting the actual preprocessor directive to be processed. + // + // If the preprocessing directive disables the tokens processing, e.g.: + // #ifdef NAME // NAME is undefined + // then lexPreprocessor() enters the lines-skipping mode. + // In this mode, it does not parse any tokens, because the code under + // the #ifdef may not even be a correct tablegen code. The preprocessor + // looks for lines containing other preprocessing directives, which + // may be prepended with whitespaces and C-style comments. If the line + // does not contain a preprocessing directive, it is skipped completely. + // Otherwise, the preprocessing directive is processed by recursively + // calling lexPreprocessor(). The processing of the encountered + // preprocessing directives includes updating preprocessing control stack + // and adding new macros into DefinedMacros set. + // + // The second parameter controls whether lexPreprocessor() is called from + // LexToken() (true) or recursively from lexPreprocessor() (false). + // + // If ReturnNextLiveToken is true, the method returns the next + // LEX token following the current directive or following the end + // of the disabled preprocessing region corresponding to this directive. + // If ReturnNextLiveToken is false, the method returns the first parameter, + // unless there were errors encountered in the disabled preprocessing + // region - in this case, it returns tgtok::Error. + tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind, + bool ReturnNextLiveToken = true); + + // Worker method for lexPreprocessor() to skip lines after some + // preprocessing directive up to the buffer end or to the directive + // that re-enables token processing. The method returns true + // upon processing the next directive that re-enables tokens + // processing. False is returned if an error was encountered. + // + // Note that prepSkipRegion() calls lexPreprocessor() to process + // encountered preprocessing directives. In this case, the second + // parameter to lexPreprocessor() is set to false. Being passed + // false ReturnNextLiveToken, lexPreprocessor() must never call + // prepSkipRegion(). We assert this by passing ReturnNextLiveToken + // to prepSkipRegion() and checking that it is never set to false. + bool prepSkipRegion(bool MustNeverBeFalse); + + // Lex name of the macro after either #ifdef or #define. We could have used + // LexIdentifier(), but it has special handling of "include" word, which + // could result in awkward diagnostic errors. Consider: + // ---- + // #ifdef include + // class ... + // ---- + // LexIdentifier() will engage LexInclude(), which will complain about + // missing file with name "class". Instead, prepLexMacroName() will treat + // "include" as a normal macro name. + // + // On entry, CurPtr points to the end of a preprocessing directive word. + // The method allows for whitespaces between the preprocessing directive + // and the macro name. The allowed whitespaces are ' ' and '\t'. + // + // If the first non-whitespace symbol after the preprocessing directive + // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then + // the method updates TokStart to the position of the first non-whitespace + // symbol, sets CurPtr to the position of the macro name's last symbol, + // and returns a string reference to the macro name. Otherwise, + // TokStart is set to the first non-whitespace symbol after the preprocessing + // directive, and the method returns an empty string reference. + // + // In all cases, TokStart may be used to point to the word following + // the preprocessing directive. + StringRef prepLexMacroName(); + + // Skip any whitespaces starting from CurPtr. The method is used + // only in the lines-skipping mode to find the first non-whitespace + // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n' + // and '\r'. The method skips C-style comments as well, because + // it is used to find the beginning of the preprocessing directive. + // If we do not handle C-style comments the following code would + // result in incorrect detection of a preprocessing directive: + // /* + // #ifdef NAME + // */ + // As long as we skip C-style comments, the following code is correctly + // recognized as a preprocessing directive: + // /* first line comment + // second line comment */ #ifdef NAME + // + // The method returns true upon reaching the first non-whitespace symbol + // or EOF, CurPtr is set to point to this symbol. The method returns false, + // if an error occured during skipping of a C-style comment. + bool prepSkipLineBegin(); + + // Skip any whitespaces or comments after a preprocessing directive. + // The method returns true upon reaching either end of the line + // or end of the file. If there is a multiline C-style comment + // after the preprocessing directive, the method skips + // the comment, so the final CurPtr may point to one of the next lines. + // The method returns false, if an error occured during skipping + // C- or C++-style comment, or a non-whitespace symbol appears + // after the preprocessing directive. + // + // The method maybe called both during lines-skipping and tokens + // processing. It actually verifies that only whitespaces or/and + // comments follow a preprocessing directive. + // + // After the execution of this mehod, CurPtr points either to new line + // symbol, buffer end or non-whitespace symbol following the preprocesing + // directive. + bool prepSkipDirectiveEnd(); + + // Skip all symbols to the end of the line/file. + // The method adjusts CurPtr, so that it points to either new line + // symbol in the current line or the buffer end. + void prepSkipToLineEnd(); + + // Return true, if the current preprocessor control stack is such that + // we should allow lexer to process the next token, false - otherwise. + // + // In particular, the method returns true, if all the #ifdef/#else + // controls on the stack have their IsDefined member set to true. + bool prepIsProcessingEnabled(); + + // Report an error, if we reach EOF with non-empty preprocessing control + // stack. This means there is no matching #endif for the previous + // #ifdef/#else. + void prepReportPreprocessorStackError(); }; } // end namespace llvm Index: lib/TableGen/TGLexer.cpp =================================================================== --- lib/TableGen/TGLexer.cpp +++ lib/TableGen/TGLexer.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/TableGen/Error.h" +#include #include #include #include @@ -28,11 +29,32 @@ using namespace llvm; -TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) { +namespace { +// A list of supported preprocessing directives with their +// internal token kinds and names. +struct { + tgtok::TokKind Kind; + const char *Word; +} PreprocessorDirs[] = { + { tgtok::Ifdef, "ifdef" }, + { tgtok::Else, "else" }, + { tgtok::Endif, "endif" }, + { tgtok::Define, "define" } +}; +} // end anonymous namespace + +TGLexer::TGLexer(SourceMgr &SM, ArrayRef Macros) : SrcMgr(SM) { CurBuffer = SrcMgr.getMainFileID(); CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); CurPtr = CurBuf.begin(); TokStart = nullptr; + + prepNewPreprocessorControl(); + // Put all macros defined in the command line into the DefinedMacros set. + std::for_each(Macros.begin(), Macros.end(), + [this](const std::string &MacroName) { + DefinedMacros.insert(MacroName); + }); } SMLoc TGLexer::getLoc() const { @@ -46,7 +68,10 @@ return tgtok::Error; } -int TGLexer::getNextChar() { +int TGLexer::getNextChar(bool *LeftPreviousFile) { + if (LeftPreviousFile) + *LeftPreviousFile = false; + char CurChar = *CurPtr++; switch (CurChar) { default: @@ -59,11 +84,49 @@ // If this is the end of an included file, pop the parent file off the // include stack. + // + // This code allows cross-file constructs, such as: + // file1.td + // class + // EOF + // file2.td + // include "file1.td" + // ClassName; + // EOF + // AND + // file1.td + // /* + // EOF + // file2.td + // include "file1.td" + // */ + // EOF + // AND + // file1.td + // class ClassName { + // list Strings = ["a" + // EOF + // file2.td + // include "file1.td" + // , "b"]; } + // EOF + + // We could have used LeftPreviousFile output to disallow that. SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); if (ParentIncludeLoc != SMLoc()) { + prepExitInclude(); + CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); CurPtr = ParentIncludeLoc.getPointer(); + // Make sure TokStart points into the parent file's buffer. + // LexToken() assigns to it before calling getNextChar(), + // so it is pointing into the included file now. + TokStart = CurPtr; + if (LeftPreviousFile) + *LeftPreviousFile = true; + + // Do not pass LeftPreviousFile here to avoid resetting it to false. return getNextChar(); } @@ -83,14 +146,22 @@ } } -int TGLexer::peekNextChar(int Index) { +int TGLexer::peekNextChar(int Index) const { return *(CurPtr + Index); } -tgtok::TokKind TGLexer::LexToken() { +tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { TokStart = CurPtr; // This always consumes at least one character. - int CurChar = getNextChar(); + bool LeftPreviousFile; + int CurChar = getNextChar(&LeftPreviousFile); + + // If we left the file, for which this invocation of LexToken() was done, + // we are at the end of an include directive (note that we could have + // left several files inside getNextChar()). So we cannot be at the beginning + // of the line/file. + if (LeftPreviousFile) + FileOrLineStart = false; switch (CurChar) { default: @@ -100,7 +171,20 @@ // Unknown character, emit an error. return ReturnError(TokStart, "Unexpected character"); - case EOF: return tgtok::Eof; + case EOF: + // Report an error, if the preprocessor control stack is not empty. + if (!PreprocessorControl->empty()) { + prepReportPreprocessorStackError(); + return tgtok::Error; + } + // Report and error, if the preprocessor include stack is not empty. + // EOF means that we are at the end of all buffers. + if (!PrepIncludeStack.empty()) { + PrintFatalError("Preprocessor include stack is not empty"); + return tgtok::Error; + } + return tgtok::Eof; + case ':': return tgtok::colon; case ';': return tgtok::semi; case '.': return tgtok::period; @@ -114,15 +198,27 @@ case ')': return tgtok::r_paren; case '=': return tgtok::equal; case '?': return tgtok::question; - case '#': return tgtok::paste; + case '#': + if (FileOrLineStart) { + tgtok::TokKind Kind = prepIsDirective(); + if (Kind != tgtok::Error) + return lexPreprocessor(Kind); + } + + return tgtok::paste; + + case '\r': + PrintFatalError("getNextChar() must never return '\r'"); + return tgtok::Error; case 0: case ' ': case '\t': - case '\n': - case '\r': // Ignore whitespace. - return LexToken(); + return LexToken(FileOrLineStart); + case '\n': + // Ignore whitespace, and identify the new line. + return LexToken(true); case '/': // If this is the start of a // comment, skip until the end of the line or // the end of the buffer. @@ -133,7 +229,7 @@ return tgtok::Error; } else // Otherwise, this is an error. return ReturnError(TokStart, "Unexpected character"); - return LexToken(); + return LexToken(FileOrLineStart); case '-': case '+': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { @@ -249,10 +345,10 @@ } tgtok::TokKind TGLexer::LexIdentifier() { - // The first letter is [a-zA-Z_#]. + // The first letter is [a-zA-Z_]. const char *IdentStart = TokStart; - // Match the rest of the identifier regex: [0-9a-zA-Z_#]* + // Match the rest of the identifier regex: [0-9a-zA-Z_]* while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') ++CurPtr; @@ -322,6 +418,8 @@ // Save the line number and lex buffer of the includer. CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); CurPtr = CurBuf.begin(); + + prepEnterInclude(); return false; } @@ -350,6 +448,13 @@ unsigned CommentDepth = 1; while (true) { + // NOTE: usage of getNextChar() actually allows cross-file comments, e.g. + // file1.td: + // /* Comment EOF + // file2.td: + // include "file1.td" */ + // + // There is no verification to disallow this. int CurChar = getNextChar(); switch (CurChar) { case EOF: @@ -496,3 +601,449 @@ return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); } + +void TGLexer::prepNewPreprocessorControl() { + PreprocessorControl = + make_unique >(); +} + +void TGLexer::prepEnterInclude() { + PrepIncludeStack.push_back(std::move(PreprocessorControl)); + prepNewPreprocessorControl(); +} + +void TGLexer::prepExitInclude() { + // Report an error, if preprocessor control stack for the current + // file is not empty. + if (!PreprocessorControl->empty()) { + prepReportPreprocessorStackError(); + + // There is no good way to stop processing more tokens and avoid + // more problems related to invalid preprocessor control stack. + // Note that the next call to getNextChar() (which is the caller + // of prepExitInclude()) may exit more included files, and on each + // exit we will print an error. To avoid this, just exit with fatal error. + PrintFatalError("Lexing stopped"); + } + + // Pop the preprocessing controls from the include stack. + if (PrepIncludeStack.empty()) { + PrintFatalError("Preprocessor include stack is empty"); + } + + PreprocessorControl = std::move(PrepIncludeStack.back()); + PrepIncludeStack.pop_back(); +} + +tgtok::TokKind TGLexer::prepIsDirective() const { + for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) { + int NextChar = *CurPtr; + bool Match = true; + unsigned I = 0; + for (; I < strlen(PreprocessorDirs[ID].Word); ++I) { + if (NextChar != PreprocessorDirs[ID].Word[I]) { + Match = false; + break; + } + + NextChar = peekNextChar(I + 1); + } + + // Check for whitespace after the directive. If there is no whitespace, + // then we do not recognize it as a preprocessing directive. + if (Match) { + tgtok::TokKind Kind = PreprocessorDirs[ID].Kind; + + // New line and EOF may follow only #else/#endif. It will be reported + // as an error for #ifdef/#define after the call to prepLexMacroName(). + if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF || + NextChar == '\n' || + // It looks like TableGen does not support '\r' as the actual + // carriage return, e.g. getNextChar() treats a single '\r' + // as '\n'. So we do the same here. + NextChar == '\r') + return Kind; + + // Allow comments after some directives, e.g.: + // #else// OR #else/**/ + // #endif// OR #endif/**/ + // + // Note that we do allow comments after #ifdef/#define here, e.g. + // #ifdef/**/ AND #ifdef// + // #define/**/ AND #define// + // + // These cases will be reported as incorrect after calling + // prepLexMacroName(). We could have supported C-style comments + // after #ifdef/#define, but this would complicate the code + // for little benefit. + if (NextChar == '/') { + NextChar = peekNextChar(I + 1); + + if (NextChar == '*' || NextChar == '/') + return Kind; + + // Pretend that we do not recognize the directive. + } + } + } + + return tgtok::Error; +} + +bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) { + TokStart = CurPtr; + + for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) + if (PreprocessorDirs[ID].Kind == Kind) { + // Advance CurPtr to the end of the preprocessing word. + CurPtr += strlen(PreprocessorDirs[ID].Word); + return true; + } + + PrintFatalError("Unsupported preprocessing token in " + "prepEatPreprocessorDirective()"); + return false; +} + +tgtok::TokKind TGLexer::lexPreprocessor( + tgtok::TokKind Kind, bool ReturnNextLiveToken) { + + // We must be looking at a preprocessing directive. Eat it! + if (!prepEatPreprocessorDirective(Kind)) + PrintFatalError("lexPreprocessor() called for unknown " + "preprocessor directive"); + + if (Kind == tgtok::Ifdef) { + StringRef MacroName = prepLexMacroName(); + if (MacroName.empty()) + return ReturnError(TokStart, "Expected macro name after #ifdef"); + + bool MacroIsDefined = DefinedMacros.count(MacroName) != 0; + + // Regardless of whether we are processing tokens or not, + // we put the #ifdef control on stack. + PreprocessorControl->push_back({Kind, MacroIsDefined, TokStart}); + + if (!prepSkipDirectiveEnd()) + return ReturnError(CurPtr, + "Only comments are supported after #ifdef NAME"); + + // If we were not processing tokens before this #ifdef, + // then just return back to the lines skipping code. + if (!ReturnNextLiveToken) + return Kind; + + // If we were processing tokens before this #ifdef, + // and the macro is defined, then just return the next token. + if (MacroIsDefined) + return LexToken(); + + // We were processing tokens before this #ifdef, and the macro + // is not defined, so we have to start skipping the lines. + // If the skipping is successful, it will return the token following + // either #else or #endif corresponding to this #ifdef. + if (prepSkipRegion(ReturnNextLiveToken)) + return LexToken(); + + return tgtok::Error; + } else if (Kind == tgtok::Else) { + // Check if this #else is correct before calling prepSkipDirectiveEnd(), + // which will move CurPtr away from the beginning of #else. + if (PreprocessorControl->empty()) + return ReturnError(TokStart, "#else without #ifdef"); + + auto &IfdefEntry = PreprocessorControl->back(); + + if (IfdefEntry.Kind != tgtok::Ifdef) { + PrintError(TokStart, "double #else"); + return ReturnError(IfdefEntry.SrcPos, "Previous #else is here"); + } + + // Replace the corresponding #ifdef's control with its negation + // on the control stack. + PreprocessorControl->pop_back(); + PreprocessorControl->push_back({Kind, !IfdefEntry.IsDefined, TokStart}); + + if (!prepSkipDirectiveEnd()) + return ReturnError(CurPtr, "Only comments are supported after #else"); + + // If we were processing tokens before this #else, + // we have to start skipping lines until the matching #endif. + if (ReturnNextLiveToken) { + if (prepSkipRegion(ReturnNextLiveToken)) + return LexToken(); + + return tgtok::Error; + } + + // Return to the lines skipping code. + return Kind; + } else if (Kind == tgtok::Endif) { + // Check if this #endif is correct before calling prepSkipDirectiveEnd(), + // which will move CurPtr away from the beginning of #endif. + if (PreprocessorControl->empty()) + return ReturnError(TokStart, "#endif without #ifdef"); + + auto &IfdefOrElseEntry = PreprocessorControl->back(); + + if (IfdefOrElseEntry.Kind != tgtok::Ifdef && + IfdefOrElseEntry.Kind != tgtok::Else) { + PrintFatalError("Invalid preprocessor control on the stack"); + return tgtok::Error; + } + + if (!prepSkipDirectiveEnd()) + return ReturnError(CurPtr, "Only comments are supported after #endif"); + + PreprocessorControl->pop_back(); + + // If we were processing tokens before this #endif, then + // we should continue it. + if (ReturnNextLiveToken) { + return LexToken(); + } + + // Return to the lines skipping code. + return Kind; + } else if (Kind == tgtok::Define) { + StringRef MacroName = prepLexMacroName(); + if (MacroName.empty()) + return ReturnError(TokStart, "Expected macro name after #define"); + + if (!DefinedMacros.insert(MacroName).second) + PrintWarning(getLoc(), + "Duplicate definition of macro: " + Twine(MacroName)); + + if (!prepSkipDirectiveEnd()) + return ReturnError(CurPtr, + "Only comments are supported after #define NAME"); + + if (!ReturnNextLiveToken) { + PrintFatalError("#define must be ignored during the lines skipping"); + return tgtok::Error; + } + + return LexToken(); + } + + PrintFatalError("Preprocessing directive is not supported"); + return tgtok::Error; +} + +bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) { + if (!MustNeverBeFalse) + PrintFatalError("Invalid recursion."); + + do { + // Skip all symbols to the line end. + prepSkipToLineEnd(); + + // Find the first non-whitespace symbol in the next line(s). + if (!prepSkipLineBegin()) + return false; + + // If the first non-blank/comment symbol on the line is '#', + // it may be a start of preprocessing directive. + // + // If it is not '#' just go to the next line. + if (*CurPtr == '#') + ++CurPtr; + else + continue; + + tgtok::TokKind Kind = prepIsDirective(); + + // If we did not find a preprocessing directive or it is #define, + // then just skip to the next line. We do not have to do anything + // for #define in the line-skipping mode. + if (Kind == tgtok::Error || Kind == tgtok::Define) + continue; + + tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false); + + // If lexPreprocessor() encountered an error during lexing this + // preprocessor idiom, then return false to the calling lexPreprocessor(). + // This will force tgtok::Error to be returned to the tokens processing. + if (ProcessedKind == tgtok::Error) + return false; + + if (Kind != ProcessedKind) + PrintFatalError("prepIsDirective() and lexPreprocessor() " + "returned different token kinds"); + + // If this preprocessing directive enables tokens processing, + // then return to the lexPreprocessor() and get to the next token. + // We can move from line-skipping mode to processing tokens only + // due to #else or #endif. + if (prepIsProcessingEnabled()) { + if (Kind != tgtok::Else && Kind != tgtok::Endif) { + PrintFatalError("Tokens processing was enabled by an unexpected " + "preprocessing directive"); + return false; + } + + return true; + } + } while (CurPtr != CurBuf.end()); + + // We have reached the end of the file, but never left the lines-skipping + // mode. This means there is no matching #endif. + prepReportPreprocessorStackError(); + return false; +} + +StringRef TGLexer::prepLexMacroName() { + // Skip whitespaces between the preprocessing directive and the macro name. + while (*CurPtr == ' ' || *CurPtr == '\t') + ++CurPtr; + + TokStart = CurPtr; + // Macro names start with [a-zA-Z_]. + if (*CurPtr != '_' && !isalpha(*CurPtr)) + return ""; + + // Match the rest of the identifier regex: [0-9a-zA-Z_]* + while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') + ++CurPtr; + + return StringRef(TokStart, CurPtr - TokStart); +} + +bool TGLexer::prepSkipLineBegin() { + while (CurPtr != CurBuf.end()) { + switch (*CurPtr) { + case ' ': + case '\t': + case '\n': + case '\r': + break; + + case '/': { + int NextChar = peekNextChar(1); + if (NextChar == '*') { + // Skip C-style comment. + // Note that we do not care about skipping the C++-style comments. + // If the line contains "//", it may not contain any processable + // preprocessing directive. Just return CurPtr pointing to + // the first '/' in this case. We also do not care about + // incorrect symbols after the first '/' - we are in lines-skipping + // mode, so incorrect code is allowed to some extent. + + // Set TokStart to the beginning of the comment to enable proper + // diagnostic printing in case of error in SkipCComment(). + TokStart = CurPtr; + + // CurPtr must point to '*' before call to SkipCComment(). + ++CurPtr; + if (SkipCComment()) + return false; + } else { + // CurPtr points to the non-whitespace '/'. + return true; + } + + // We must not increment CurPtr after the comment was lexed. + continue; + } + + default: + return true; + } + + ++CurPtr; + } + + // We have reached the end of the file. Return to the lines skipping + // code, and allow it to handle the EOF as needed. + return true; +} + +bool TGLexer::prepSkipDirectiveEnd() { + while (CurPtr != CurBuf.end()) { + switch (*CurPtr) { + case ' ': + case '\t': + break; + + case '\n': + case '\r': + return true; + + case '/': { + int NextChar = peekNextChar(1); + if (NextChar == '/') { + // Skip C++-style comment. + // We may just return true now, but let's skip to the line/buffer end + // to simplify the method specification. + ++CurPtr; + SkipBCPLComment(); + } else if (NextChar == '*') { + // When we are skipping C-style comment at the end of a preprocessing + // directive, we can skip several lines. If any meaningful TD token + // follows the end of the C-style comment on the same line, it will + // be considered as an invalid usage of TD token. + // For example, we want to forbid usages like this one: + // #define MACRO class Class {} + // But with C-style comments we also disallow the following: + // #define MACRO /* This macro is used + // to ... */ class Class {} + // One can argue that this should be allowed, but it does not seem + // to be worth of the complication. Moreover, this matches + // the C preprocessor behavior. + + // Set TokStart to the beginning of the comment to enable proper + // diagnostic printer in case of error in SkipCComment(). + TokStart = CurPtr; + ++CurPtr; + if (SkipCComment()) + return false; + } else { + TokStart = CurPtr; + PrintError(CurPtr, "Unexpected character"); + return false; + } + + // We must not increment CurPtr after the comment was lexed. + continue; + } + + default: + // Do not allow any non-whitespaces after the directive. + TokStart = CurPtr; + return false; + } + + ++CurPtr; + } + + return true; +} + +void TGLexer::prepSkipToLineEnd() { + while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) + ++CurPtr; +} + +bool TGLexer::prepIsProcessingEnabled() { + for (auto I = PreprocessorControl->rbegin(), E = PreprocessorControl->rend(); + I != E; ++I) { + if (!I->IsDefined) + return false; + } + + return true; +} + +void TGLexer::prepReportPreprocessorStackError() { + if (PreprocessorControl->empty()) { + PrintFatalError("prepReportPreprocessorStackError() called with " + "empty control stack"); + return; + } + + auto &PrepControl = PreprocessorControl->back(); + PrintError(CurBuf.end(), "Reached EOF without matching #endif"); + PrintError(PrepControl.SrcPos, "The latest preprocessor control is here"); + + TokStart = CurPtr; +} Index: lib/TableGen/TGParser.h =================================================================== --- lib/TableGen/TGParser.h +++ lib/TableGen/TGParser.h @@ -115,8 +115,9 @@ }; public: - TGParser(SourceMgr &SrcMgr, RecordKeeper &records) - : Lex(SrcMgr), CurMultiClass(nullptr), Records(records) {} + TGParser(SourceMgr &SrcMgr, ArrayRef Macros, + RecordKeeper &records) + : Lex(SrcMgr, Macros), CurMultiClass(nullptr), Records(records) {} /// ParseFile - Main entrypoint for parsing a tblgen file. These parser /// routines return true on error, or false on success. Index: test/TableGen/prep-diag1.td =================================================================== --- test/TableGen/prep-diag1.td +++ test/TableGen/prep-diag1.td @@ -0,0 +1,28 @@ +// RUN: not llvm-tblgen -DDIAG1 -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG1 %s + +#ifdef DIAG1 +// DIAG1: error: Only comments are supported after #define NAME +#define ENABLED1/* +*/class C; +#endif // DIAG1 + +// RUN: not llvm-tblgen -DDIAG4 -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG4 %s + +#ifdef DIAG4 +// DIAG4: warning: Duplicate definition of macro: ENABLED1 +#define ENABLED1 +#define ENABLED1 +#endif // DIAG4 + +// RUN: not llvm-tblgen -DDIAG2 -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG2 %s +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG3 %s + +#ifdef DIAG2 +// DIAG2: error: Only comments are supported after #ifdef NAME + +// Invalid #ifdef below should be detected even if DIAG2 is not defined. +// DIAG3: error: Only comments are supported after #ifdef NAME +#ifdef DIAG2/* +*/class C; +#endif +#endif // DIAG2 Index: test/TableGen/prep-diag10.td =================================================================== --- test/TableGen/prep-diag10.td +++ test/TableGen/prep-diag10.td @@ -0,0 +1,6 @@ +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s + +// CHECK: error: Reached EOF without matching #endif +// CHECK: error: The latest preprocessor control is here +#ifdef DISABLED +#else Index: test/TableGen/prep-diag11-include.td =================================================================== --- test/TableGen/prep-diag11-include.td +++ test/TableGen/prep-diag11-include.td @@ -0,0 +1,2 @@ +// RUN: echo +#ifdef ENABLED Index: test/TableGen/prep-diag11.td =================================================================== --- test/TableGen/prep-diag11.td +++ test/TableGen/prep-diag11.td @@ -0,0 +1,9 @@ +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s + +// CHECK: error: Reached EOF without matching #endif +// CHECK: error: The latest preprocessor control is here +// CHECK: error: Lexing stopped +#ifdef DISABLED +#else +#define ENABLED +include "prep-diag11-include.td" Index: test/TableGen/prep-diag12-include.td =================================================================== --- test/TableGen/prep-diag12-include.td +++ test/TableGen/prep-diag12-include.td @@ -0,0 +1,4 @@ +// RUN: echo + +#ifdef ENABLED +#else Index: test/TableGen/prep-diag12.td =================================================================== --- test/TableGen/prep-diag12.td +++ test/TableGen/prep-diag12.td @@ -0,0 +1,8 @@ +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s + +// CHECK: error: Reached EOF without matching #endif +// CHECK: error: The latest preprocessor control is here +#ifdef DISABLED +#else +#define ENABLED +include "prep-diag12-include.td" Index: test/TableGen/prep-diag13.td =================================================================== --- test/TableGen/prep-diag13.td +++ test/TableGen/prep-diag13.td @@ -0,0 +1,9 @@ +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s + +// CHECK: error: Reached EOF without matching #endif +// CHECK: error: The latest preprocessor control is here +#ifdef DISABLED +/* +#else +#endif +*/ Index: test/TableGen/prep-diag14.td =================================================================== --- test/TableGen/prep-diag14.td +++ test/TableGen/prep-diag14.td @@ -0,0 +1,6 @@ +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s + +// CHECK: error: Reached EOF without matching #endif +// CHECK: error: The latest preprocessor control is here +#ifdef DISABLED +// #endif Index: test/TableGen/prep-diag2.td =================================================================== --- test/TableGen/prep-diag2.td +++ test/TableGen/prep-diag2.td @@ -0,0 +1,14 @@ +// RUN: not llvm-tblgen -DDIAG1 -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG1 %s +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG2 %s + +#ifdef DIAG1 +// DIAG1: error: Only comments are supported after #else + +// Invalid #else below should be detected even if DIAG1 is not defined. +// DIAG2: error: Only comments are supported after #else +#ifdef DIAG2//DIAG2 +#else/* +*/class C; +#endif +#endif // DIAG1 + Index: test/TableGen/prep-diag3.td =================================================================== --- test/TableGen/prep-diag3.td +++ test/TableGen/prep-diag3.td @@ -0,0 +1,14 @@ +// RUN: not llvm-tblgen -DDIAG1 -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG1 %s +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG2 %s + +#ifdef DIAG1 +// DIAG1: error: Only comments are supported after #endif + +// Invalid #else below should be detected even if DIAG1 is not defined. +// DIAG2: error: Only comments are supported after #endif +#ifdef DIAG2//DIAG2 +#else/*!DIAG2*/ +#endif/* !DIAG2 +*/class C; +#endif // DIAG1 + Index: test/TableGen/prep-diag4.td =================================================================== --- test/TableGen/prep-diag4.td +++ test/TableGen/prep-diag4.td @@ -0,0 +1,8 @@ +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s + +// CHECK: error: double #else +// CHECK: error: Previous #else is here +#ifdef DIAG1 +#else +#else +#endif Index: test/TableGen/prep-diag5.td =================================================================== --- test/TableGen/prep-diag5.td +++ test/TableGen/prep-diag5.td @@ -0,0 +1,6 @@ +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s + +// CHECK: error: #else without #ifdef +#else +#else +#endif Index: test/TableGen/prep-diag6.td =================================================================== --- test/TableGen/prep-diag6.td +++ test/TableGen/prep-diag6.td @@ -0,0 +1,7 @@ +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s + +// CHECK: error: Expected macro name after #ifdef +#ifdef +#else +#else +#endif Index: test/TableGen/prep-diag7.td =================================================================== --- test/TableGen/prep-diag7.td +++ test/TableGen/prep-diag7.td @@ -0,0 +1,4 @@ +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s + +// CHECK: error: #endif without #ifdef +#endif Index: test/TableGen/prep-diag8.td =================================================================== --- test/TableGen/prep-diag8.td +++ test/TableGen/prep-diag8.td @@ -0,0 +1,5 @@ +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s + +// CHECK: error: Expected macro name after #define +#define +#endif Index: test/TableGen/prep-diag9.td =================================================================== --- test/TableGen/prep-diag9.td +++ test/TableGen/prep-diag9.td @@ -0,0 +1,5 @@ +// RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s + +// CHECK: error: Reached EOF without matching #endif +// CHECK: error: The latest preprocessor control is here +#ifdef DISABLED Index: test/TableGen/prep-region-include.td =================================================================== --- test/TableGen/prep-region-include.td +++ test/TableGen/prep-region-include.td @@ -0,0 +1,9 @@ +// RUN: echo +#ifdef ENABLED4 +def ifdef_enabled4 : C; +#else +def ifdef_enabled4_else : C; +#endif + +// EOF immediately after ENABLED5 +#define ENABLED5 Index: test/TableGen/prep-region-processing.td =================================================================== --- test/TableGen/prep-region-processing.td +++ test/TableGen/prep-region-processing.td @@ -0,0 +1,150 @@ +// RUN: llvm-tblgen -I %p %s 2>&1 | FileCheck %s --implicit-check-not warning: + +class C; + +// TableGen prints records in alpabetical order. +// CHECK-NOT: def ifdef_disabled1 +// CHECK-NOT: def ifdef_disabled2 +// CHECK: def ifdef_disabled3 +// CHECK-NOT: def ifdef_disabled4 +// CHECK-NOT: def ifdef_disabled5 +// CHECK: def ifdef_disabled4_else +// CHECK-NOT: def ifdef_disabled5_else +// CHECK: def ifdef_enabled1 +// CHECK-NOT: def ifdef_enabled2 +// CHECK: def ifdef_enabled3 +// CHECK: def ifdef_enabled4 +// CHECK-NOT: def ifdef_enabled4_else +// CHECK: def ifdef_enabled5 +// CHECK: def ifdef_enabled6 +// CHECK-NOT: def ifdef_enabled6_else +// CHECK-NOT: def ifdef_disabled6 +// CHECK-NOT: def ifdef_disabled6_else + +#define ENABLED1 +#define ENABLED2 + +#ifdef DISABLED1 +// +def ifdef_disabled1 : C; + +#define DISABLED2/*This one is disabled, + because DISABLED1 is. +*/ +#endif + +#ifdef ENABLED1 +def ifdef_enabled1 : C; +#endif + +#ifdef DISABLED2/* +*/ +def ifdef_disabled2 : C; +#endif + +/* +#ifdef ENABLED2 +def ifdef_enabled2 : C; +#endif +*/ + +//#ifdef DISABLED3 +def ifdef_disabled3 : C; + +//#endif + +#ifdef _DISABLED4 +def ifdef_disabled4 : C; +#else// /*!_DISABLED4 +def ifdef_disabled4_else : C; + +#define ENABLED3 +#endif + +#ifdef __DISABLED5 +def ifdef_disabled5 : C; +/* + +*/#else +#ifdef ENABLED3 +def ifdef_enabled3 : C; +#else /* //!ENABLED3 +*/ +def ifdef_disabled5_else : C; +#endif +#endif + +#define ENABLED4 +include "prep-region-include.td"//ENABLED5 is defined inside + +#ifdef ENABLED5 +def ifdef_enabled5 : C; +#endif // ENABLED5 + +#ifdef DISABLED6__ +// Double inclusion is an error. +include "prep-region-include.td" +#else +#endif + +#ifdef DIS +#ifdef DIS +#ifdef DIS +#ifdef DIS +#ifdef DIS +#ifdef DIS +#ifdef DIS +#ifdef DIS +#ifdef DIS +#ifdef DIS +#ifdef DIS +def ifdef_disabled6 : C; +#endif +#endif +#endif +#endif +#else +def ifdef_disabled6_else : C; +#endif +#endif +#endif +#endif +#endif +#endif +#else +#define ENAB//ENAB +#endif + +#ifdef ENAB +#ifdef ENAB +#ifdef ENAB +#ifdef ENAB +#ifdef ENAB +#ifdef ENAB +#ifdef ENAB +#ifdef ENAB +#ifdef ENAB +#ifdef ENAB +#ifdef ENAB +def ifdef_enabled6 : C; +#endif +#endif +#endif +#endif +#else +def ifdef_enabled6_else : C; +#endif +#endif +#endif +#endif +#endif +#endif +#endif + +#ifdef DISABLED_7 +include "non-existent-file.td" +#endif + +#ifdef DISABLED_8 +\\\\\ invalid TD text ///// +#endif // DISABLED_8