Index: docs/CommandGuide/FileCheck.rst =================================================================== --- docs/CommandGuide/FileCheck.rst +++ docs/CommandGuide/FileCheck.rst @@ -483,16 +483,39 @@ FileCheck Expressions ~~~~~~~~~~~~~~~~~~~~~ +:program:`FileCheck` also allows to use the result of a simple arithmetic +expression involving variables. This allows to capture relations between +variables such as the need for consecutive registers to be used. The syntax +is ``[[]]`` where ```` is a :program:`FileCheck` +variable, ```` is a supported arithmetic operation and ```` is +an integer. Currently supported arithmetic operations are ``+`` and ``-``. For +example: + +.. code-block:: llvm + + ; CHECK: op [[REG:r[0-9]+]], [[REG]], [[REG+1]] + +Important note: in its current implementation variable defined in a pattern +with expressions must match the right text when replacing each variable +expressions by the regular expression `[[:digit:]]+`. This is because variable +expression are not handled by the regular expression engine and are implemented +by a 2 passes process where the first pass define the variables and replace the +expression with the above regular expression. The second pass can then do a +second match with the variable expressions substituted by their value. + +FileCheck Pseudo Variables +~~~~~~~~~~~~~~~~~~~~~~~~~~ + Sometimes there's a need to verify output which refers line numbers of the match file, e.g. when testing compiler diagnostics. This introduces a certain fragility of the match file structure, as "``CHECK:``" lines contain absolute line numbers in the same file, which have to be updated whenever line numbers change due to text addition or deletion. -To support this case, FileCheck allows using ``[[@LINE]]``, -``[[@LINE+]]``, ``[[@LINE-]]`` expressions in patterns. These -expressions expand to a number of the line where a pattern is located (with an -optional integer offset). +To support this case, FileCheck understands the ``@LINE`` pseudo variable in +patterns in both regular variable uses ``[[@LINE]]`` and in expressions +``[[@LINE+]]``, ``[[@LINE-]]``. These expand to a number of +the line where a pattern is located (with an optional integer offset). This way match patterns can be put near the relevant test lines and include relative line number references, for example: Index: test/FileCheck/var-expression.txt =================================================================== --- /dev/null +++ test/FileCheck/var-expression.txt @@ -0,0 +1,36 @@ +; RUN: FileCheck -input-file %s %s + +; expression using variables defined on other lines +START +1 +2 +0 +3 +; expression using variables defined on same line +MARK1 +11 12 +11 10 +11 13 +; expression using variables redefined on same line +MARK2 +21 +22 31 32 +30 41 40 +END + +; Ensure we try to match each line with digits by wrapping them between START +; and END and using CHECK-NEXT to ensure each line is covered. +CHECK-LABEL: START +CHECK-NEXT: [[VAR1:[0-9]+]] +CHECK-NEXT: [[VAR1+1]] +CHECK-NEXT: [[VAR1-1]] +CHECK-NOT: [[VAR1+1]] +CHECK-LABEL: MARK1 +CHECK-NEXT: [[VAREL:[0-9]+]] [[VAREL+1]] +CHECK-NEXT: [[VAREL]] [[VAREL-1]] +CHECK-NOT: [[VAREL]] [[VAREL+1]] +CHECK-LABEL: MARK2 +CHECK-NEXT: [[VAR:[0-9]+]] +CHECK-NEXT: [[VAR+1]] [[VAR:[0-9]+]] [[VAR+1]] +CHECK-NEXT: [[VAR-1]] [[VAR:[0-9]+]] [[VAR-1]] +CHECK-NEXT: END Index: utils/FileCheck/FileCheck.cpp =================================================================== --- utils/FileCheck/FileCheck.cpp +++ utils/FileCheck/FileCheck.cpp @@ -109,6 +109,9 @@ } class Pattern { + /// Valid operations in expressions + static constexpr StringRef validArithOps = StringRef("+-", 2); + SMLoc PatternLoc; /// A fixed string to match as the pattern or empty if this pattern requires @@ -125,11 +128,12 @@ /// bar at offset 3. std::vector> VariableUses; - /// Maps definitions of variables to their parenthesized capture numbers. - /// - /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to - /// 1. - std::map VariableDefs; + /// Maps defined variables to each of their definitions' parenthesized + /// capture numbers and position in the pattern string. + /// + /// E.g. for the pattern "foo[[bar:.*]]baz[[bar:.*]]", VariableDefs will map + /// "bar" to the vector {{1, 3}, {2, 16}}. + std::map>> VariableDefs; Check::CheckType CheckTy; @@ -161,10 +165,21 @@ unsigned ComputeMatchDistance(StringRef Buffer, const StringMap &VariableTable) const; - bool EvaluateExpression(StringRef Expr, std::string &Value) const; + bool GetVariableValue(StringRef Name, unsigned index, + StringMap &GlobalVariableTable, + StringMap>> + &LocalVariableTable, + StringRef &Value) const; + bool EvaluateExpression(StringRef Expr, unsigned index, + StringMap &GlobalVariableTable, + StringMap>> + &LocalVariableTable, + std::string &Value, bool CheckOnly) const; size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); }; +constexpr StringRef Pattern::validArithOps; + /// Parses the given string into the Pattern. /// /// \p Prefix provides which prefix is being matched, \p SM provides the @@ -281,16 +296,25 @@ return true; } - // Verify that the name/expression is well formed. FileCheck currently - // supports @LINE, @LINE+number, @LINE-number expressions. The check here - // is relaxed, more strict check is performed in \c EvaluateExpression. + // Verify that the name/expression is well formed. Variable names are + // composed of alphanumeric characters and underscores. Expressions + // are defined by what EvaluateExpression accepts. bool IsExpression = false; for (unsigned i = 0, e = Name.size(); i != e; ++i) { if (i == 0) { + bool TryExpression = + (Name.find_first_of(validArithOps) != StringRef::npos); if (Name[i] == '$') // Global vars start with '$' continue; - if (Name[i] == '@') { - if (NameEnd != StringRef::npos) { + if (Name[i] == '@' || TryExpression) { + std::string Value; + StringMap VariableTable; + StringMap>> + LocalVariableTable; + if (NameEnd != StringRef::npos || + !EvaluateExpression(Name, RegExStr.size(), VariableTable, + LocalVariableTable, Value, + true /*CheckOnly*/)) { SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, "invalid name in named regex definition"); @@ -300,8 +324,7 @@ continue; } } - if (Name[i] != '_' && !isalnum(Name[i]) && - (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { + if (Name[i] != '_' && !isalnum(Name[i]) && !IsExpression) { SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), SourceMgr::DK_Error, "invalid name in named regex"); return true; @@ -317,10 +340,18 @@ // Handle [[foo]]. if (NameEnd == StringRef::npos) { - // Handle variables that were defined earlier on the same line by - // emitting a backreference. - if (VariableDefs.find(Name) != VariableDefs.end()) { - unsigned VarParenNum = VariableDefs[Name]; + StringRef Expr = Name; + + // Get the name part of expressions. + if (IsExpression) { + StringRef::size_type OpStart = Expr.find_first_of(validArithOps); + Name = Expr.substr(0, OpStart); + } + + // Handle non-expression use of variables that were defined earlier on + // the same line by emitting a backreference. + if (VariableDefs.find(Name) != VariableDefs.end() && !IsExpression) { + unsigned VarParenNum = (--(VariableDefs[Name].end()))->first; if (VarParenNum < 1 || VarParenNum > 9) { SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, @@ -329,13 +360,16 @@ } AddBackrefToRegEx(VarParenNum); } else { - VariableUses.push_back(std::make_pair(Name, RegExStr.size())); + VariableUses.push_back(std::make_pair(Expr, RegExStr.size())); } continue; } // Handle [[foo:.*]]. - VariableDefs[Name] = CurParen; + if (VariableDefs.find(Name) == VariableDefs.end()) + VariableDefs[Name] = std::vector>(); + VariableDefs[Name].emplace_back( + std::make_pair(CurParen, RegExStr.size())); RegExStr += '('; ++CurParen; @@ -382,24 +416,125 @@ RegExStr += Backref; } -/// Evaluates expression and stores the result to \p Value. +/// Evaluates expression Expr at \p index in the pattern and stores the result +/// to \p Value. GlobalVariableTable holds the value of variables defined in +/// patterns in earlier lines while LocalVariableTable holds the values of +/// all definitions of variables defined in the same line. CheckOnly allows +/// to only check that the expression is well formed, in which case +/// GlobalVariableTable and LocalVariableTable are not used. /// -/// Returns true on success and false when the expression has invalid syntax. -bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { - // The only supported expression is @LINE([\+-]\d+)? - if (!Expr.startswith("@LINE")) - return false; - Expr = Expr.substr(StringRef("@LINE").size()); - int Offset = 0; - if (!Expr.empty()) { - if (Expr[0] == '+') - Expr = Expr.substr(1); - else if (Expr[0] != '-') +/// Valid expressions are addition and substraction of a constant from a +/// user-defined variable or LINE builtin variable. For the latter case the +/// operation is optional, ie @LINE is a valid expression. +/// +/// Returns whether the arithmetic expression is valid. +bool Pattern::EvaluateExpression( + StringRef Expr, unsigned index, StringMap &GlobalVariableTable, + StringMap>> &LocalVariableTable, + std::string &Value, bool CheckOnly) const { + int val; + StringRef VarName; + if (Expr.startswith("@LINE")) { + Expr = Expr.substr(1); + VarName = "LINE"; + if (!CheckOnly) + val = LineNumber; + } else { + StringRef::size_type OpStart = Expr.find_first_of(validArithOps); + // Reject reference to user-defined variable if no arithmetic + // expression is involved. + if (OpStart == StringRef::npos) + return false; + VarName = Expr.substr(0, OpStart); + if (!CheckOnly) { + StringRef ValueRef; + // Undefined variable. + if (!GetVariableValue(Expr, index, GlobalVariableTable, + LocalVariableTable, ValueRef)) + return false; + if (ValueRef.getAsInteger(10, val)) + return false; + } + } + + Expr = Expr.substr(VarName.size()); + if (Expr.empty() && CheckOnly) + return true; + else if (!Expr.empty()) { + // Invalid arithmetic operation + if (StringRef(validArithOps).find_first_of(Expr[0]) == StringRef::npos) return false; - if (Expr.getAsInteger(10, Offset)) + else if (CheckOnly) + return true; + + char op = Expr[0]; + Expr = Expr.substr(1); + int offset = 0; + if (Expr.getAsInteger(10, offset)) return false; + switch (op) { + case '+': + val += offset; + break; + case '-': + val -= offset; + break; + default: + return false; + } + } + Value = llvm::itostr(val); + return true; +} + +/// Evaluates use of variable Name at \p index in the pattern and stores the +/// result to \p Value. GlobalVariableTable holds the value of variables +/// defined in patterns in earlier lines while LocalVariableTable holds the +/// values of all definitions of variables defined in the same line. +/// +/// Returns whether the variable used was defined. +bool Pattern::GetVariableValue( + StringRef Name, unsigned index, StringMap &GlobalVariableTable, + StringMap>> &LocalVariableTable, + StringRef &Value) const { + bool isExpression = (Name.find_first_of(validArithOps) != StringRef::npos); + + // Expressions cannot use backreference when referencing variable defined + // earlier on the same line due to the resulting value being different than + // the one defined. Therefore we need to explicitely check here whether the + // variable was defined on the same line at an earlier index for expressions. + if (isExpression) { + Name = Name.substr(0, Name.find_first_of(validArithOps)); + auto VariableDefIterator = VariableDefs.find(Name); + // At least one definition of this variable exist on the same line at an + // earlier index. Search for its value. + if (VariableDefIterator != VariableDefs.end() && + VariableDefIterator->second.begin()->second < index) { + auto LocalVariableIterator = LocalVariableTable.find(Name); + assert(LocalVariableIterator != LocalVariableTable.end()); + + auto VariableVectorIterator = LocalVariableIterator->second.begin(); + assert(VariableVectorIterator != LocalVariableIterator->second.end()); + for (; VariableVectorIterator != LocalVariableIterator->second.end() && + VariableVectorIterator->second < index; + ++VariableVectorIterator) + ; + --VariableVectorIterator; + assert(VariableVectorIterator->second < index); + + Value = VariableVectorIterator->first; + return true; + } } - Value = llvm::itostr(LineNumber + Offset); + + // No local definition found or this is a non-expression variable use, in + // which case it refers to a definition on another line since same-line use + // are handled by back-references. Search for its earlier definition. + StringMap::iterator git = GlobalVariableTable.find(Name); + if (git == GlobalVariableTable.end()) + return false; + + Value = git->second; return true; } @@ -409,10 +544,10 @@ /// there is a match, the size of the matched string is returned in \p /// MatchLen. /// -/// The \p VariableTable StringMap provides the current values of filecheck -/// variables and is updated if this match defines new values. +/// The \p GlobalVariableTable StringMap provides the current values of +/// filecheck variables and is updated if this match defines new values. size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, - StringMap &VariableTable) const { + StringMap &GlobalVariableTable) const { // If this is the EOF pattern, match it immediately. if (CheckTy == Check::CheckEOF) { MatchLen = 0; @@ -431,56 +566,134 @@ // actual value. StringRef RegExToMatch = RegExStr; std::string TmpStr; - if (!VariableUses.empty()) { - TmpStr = RegExStr; - - unsigned InsertOffset = 0; - for (const auto &VariableUse : VariableUses) { - std::string Value; + StringMap>> LocalVariableTable = + {}; + SmallVector MatchInfo; + enum MatchState { Unprocessed, DefProcessed, ExprProcessed }; + enum MatchState State = Unprocessed; + + // Do two passes over the pattern to handle expressions using variable + // defined earlier on the same line. First pass will replace all other uses + // of variable by their value and then do temporary records of the value of + // variables defined in the pattern. Second pass replaces all uses, using + // that temporary record. Note that non-expression variable use of variables + // defined on the same line are handled via back-reference and thus are not + // replaced by any of those passes. + while (State != ExprProcessed) { + if (!VariableUses.empty()) { + TmpStr = RegExStr; + + unsigned InsertOffset = 0; + for (const auto &VariableUse : VariableUses) { + std::string Value; + StringRef Expr = VariableUse.first; + bool isPseudo = (Expr[0] == '@'); + bool isExpression = + (Expr.find_first_of(validArithOps) != StringRef::npos); + + // Pseudo variable (eg. @LINE) + if (isPseudo) { + if (!EvaluateExpression(Expr, VariableUse.second, GlobalVariableTable, + LocalVariableTable, Value, + false /*CheckOnly*/)) + return StringRef::npos; + } else { + StringRef ValueRef; + bool VarFound = false; + + // Wait second pass to replace uses in expression + if (State >= DefProcessed || !isExpression) + VarFound = + GetVariableValue(Expr, VariableUse.second, GlobalVariableTable, + LocalVariableTable, ValueRef); + + if (VarFound) { + if (isExpression) { + if (!EvaluateExpression(Expr, VariableUse.second, + GlobalVariableTable, LocalVariableTable, + Value, false /*CheckOnly*/)) + return StringRef::npos; + } else { + // Look up the value and escape it so that we can put it into the + // regex. + Value += Regex::escape(ValueRef); + } + } else { + // During first pass, allow any number of digits instead of the + // expressions. This allows more matches in this pass than the + // actual pattern which might lead to the wrong temporary record + // of variable definition at the end of the pass. This in turns + // might resolve expression incorrectly in the second pass and + // give incorrect match, hence the documented limitations on use + // of expressions. + if (isExpression && State < DefProcessed) { + Value += std::string("[[:digit:]]+"); + } else { + // If the variable is undefined, return an error. + return StringRef::npos; + } + } + } - if (VariableUse.first[0] == '@') { - if (!EvaluateExpression(VariableUse.first, Value)) - return StringRef::npos; - } else { - StringMap::iterator it = - VariableTable.find(VariableUse.first); - // If the variable is undefined, return an error. - if (it == VariableTable.end()) - return StringRef::npos; - - // Look up the value and escape it so that we can put it into the regex. - Value += Regex::escape(it->second); + // Plop it into the regex at the adjusted offset. + TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, + Value.begin(), Value.end()); + InsertOffset += Value.size(); } - // Plop it into the regex at the adjusted offset. - TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, - Value.begin(), Value.end()); - InsertOffset += Value.size(); + // Match the newly constructed regex. + RegExToMatch = TmpStr; } - // Match the newly constructed regex. - RegExToMatch = TmpStr; - } + if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) + return StringRef::npos; - SmallVector MatchInfo; - if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) - return StringRef::npos; + // Successful regex match. + assert(!MatchInfo.empty() && "Didn't get any match"); + + // If this defines any variables, temporarily record their values for each + // of the definition to resolve expression in the next pass. + if (State == Unprocessed) { + for (const auto &VariableDef : VariableDefs) { + for (auto VariableDefIt : VariableDef.second) { + assert(VariableDefIt.first < MatchInfo.size() && + "Internal paren error"); + if (LocalVariableTable.find(VariableDef.first) == + LocalVariableTable.end()) + LocalVariableTable[VariableDef.first] = + std::vector>(); + LocalVariableTable[VariableDef.first].emplace_back(std::make_pair( + MatchInfo[VariableDefIt.first], VariableDefIt.second)); + } + } + } - // Successful regex match. - assert(!MatchInfo.empty() && "Didn't get any match"); - StringRef FullMatch = MatchInfo[0]; + switch (State) { + case Unprocessed: + State = DefProcessed; + break; + case DefProcessed: + State = ExprProcessed; + break; + default: + assert(0); + break; + } + } // If this defines any variables, remember their values. for (const auto &VariableDef : VariableDefs) { - assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); - VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; + assert((--(VariableDef.second.end()))->first < MatchInfo.size() && + "Internal paren error"); + GlobalVariableTable[VariableDef.first] = + MatchInfo[(--(VariableDef.second.end()))->first]; } + StringRef FullMatch = MatchInfo[0]; MatchLen = FullMatch.size(); return FullMatch.data() - Buffer.data(); } - /// Computes an arbitrary estimate for the quality of matching this pattern at /// the start of \p Buffer; a distance of zero should correspond to a perfect /// match. @@ -515,9 +728,16 @@ SmallString<256> Msg; raw_svector_ostream OS(Msg); StringRef Var = VariableUse.first; - if (Var[0] == '@') { + bool TryExpression = + (Var.find_first_of(validArithOps) != StringRef::npos); + if (Var[0] == '@' || TryExpression) { std::string Value; - if (EvaluateExpression(Var, Value)) { + StringMap VariableTable; + StringMap>> + LocalVariableTable; + if (EvaluateExpression(Var, VariableUse.second, VariableTable, + LocalVariableTable, Value, + false /*CheckOnly*/)) { OS << "with expression \""; OS.write_escaped(Var) << "\" equal to \""; OS.write_escaped(Value) << "\"";