diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -611,15 +611,15 @@ /// Parse up to a token of kind \p EndTok and return the contents from the /// current token up to (but not including) this token; the current token on - /// exit will be either this kind or EOF. - StringRef parseStringTo(AsmToken::TokenKind EndTok); + /// exit will be either this kind or EOF. Reads through instantiated macro + /// functions and text macros. + SmallVector parseStringRefsTo(AsmToken::TokenKind EndTok); + std::string parseStringTo(AsmToken::TokenKind EndTok); /// Parse up to the end of statement and return the contents from the current /// token until the end of the statement; the current token on exit will be /// either the EndOfStatement or EOF. - StringRef parseStringToEndOfStatement() override { - return parseStringTo(AsmToken::EndOfStatement); - } + StringRef parseStringToEndOfStatement() override; bool parseTextItem(std::string &Data); @@ -1122,8 +1122,11 @@ const AsmToken *tok = &Lexer.Lex(); while (tok->is(AsmToken::Identifier)) { - auto it = Variables.find(tok->getIdentifier()); + auto it = Variables.find(tok->getIdentifier().lower()); + const llvm::MCAsmMacro *M = + getContext().lookupMacro(tok->getIdentifier().lower()); if (it != Variables.end() && it->second.IsText) { + // This is a textmacro; expand it in place. std::unique_ptr Instantiation = MemoryBuffer::getMemBufferCopy(it->second.TextValue, ""); @@ -1135,6 +1138,15 @@ /*EndStatementAtEOF=*/false); EndStatementAtEOFStack.push_back(false); tok = &Lexer.Lex(); + } else if (M && M->Function && Lexer.peekTok().is(AsmToken::LParen)) { + // This is a macro function invocation; expand it in place. + const AsmToken MacroTok = *tok; + tok = &Lexer.Lex(); + if (handleMacroInvocation(M, MacroTok.getLoc())) { + Lexer.UnLex(AsmToken(AsmToken::Error, MacroTok.getIdentifier())); + tok = &Lexer.Lex(); + } + continue; } else { break; } @@ -1321,18 +1333,62 @@ /// Throw away the rest of the line for testing purposes. void MasmParser::eatToEndOfStatement() { - while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof)) + while (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.is(AsmToken::Eof)) { + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc == SMLoc()) { + break; + } + + EndStatementAtEOFStack.pop_back(); + jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); + } + Lexer.Lex(); + } // Eat EOL. if (Lexer.is(AsmToken::EndOfStatement)) Lexer.Lex(); } -StringRef MasmParser::parseStringTo(AsmToken::TokenKind EndTok) { +SmallVector +MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) { + SmallVector Refs; const char *Start = getTok().getLoc().getPointer(); + while (Lexer.isNot(EndTok)) { + if (Lexer.is(AsmToken::Eof)) { + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc == SMLoc()) { + break; + } + Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start); - while (Lexer.isNot(EndTok) && Lexer.isNot(AsmToken::Eof)) + EndStatementAtEOFStack.pop_back(); + jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); + Lexer.Lex(); + Start = getTok().getLoc().getPointer(); + } else { + Lexer.Lex(); + } + } + Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start); + return Refs; +} + +std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) { + SmallVector Refs = parseStringRefsTo(EndTok); + std::string Str; + for (StringRef S : Refs) { + Str.append(S.str()); + } + return Str; +} + +StringRef MasmParser::parseStringToEndOfStatement() { + const char *Start = getTok().getLoc().getPointer(); + + while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof)) Lexer.Lex(); const char *End = getTok().getLoc().getPointer(); @@ -1370,7 +1426,6 @@ /// Parse a primary expression and return it. /// primaryexpr ::= (parenexpr -/// primaryexpr ::= macro_function "(" macro_arguments ")" /// primaryexpr ::= symbol /// primaryexpr ::= number /// primaryexpr ::= '.' @@ -1420,12 +1475,6 @@ Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); return false; } - // Parse macro function invocation. - if (const MCAsmMacro *M = getContext().lookupMacro(Identifier)) { - if (handleMacroInvocation(M, FirstTokenLoc)) - return true; - return parsePrimaryExpr(Res, EndLoc, nullptr); - } // Parse symbol variant. std::pair Split; if (!MAI.useParensForSymbolVariant()) { @@ -2090,7 +2139,7 @@ // does not understand Labels. This may cause us to see a Hash // here instead of a preprocessor line comment. if (getTok().is(AsmToken::Hash)) { - StringRef CommentStr = parseStringToEndOfStatement(); + std::string CommentStr = parseStringTo(AsmToken::EndOfStatement); Lexer.Lex(); Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr)); } @@ -2123,12 +2172,8 @@ } // If macros are enabled, check to see if this is a macro instantiation. - if (const MCAsmMacro *M = getContext().lookupMacro(IDVal)) { - if (M->Function) { - return handleMacroInvocation(M, IDLoc); - } else { - return handleMacroEntry(M, IDLoc); - } + if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) { + return handleMacroEntry(M, IDLoc); } // Otherwise, we have a normal instruction or directive. @@ -2728,7 +2773,7 @@ ++I; ++Pos; } - while (isMacroParameterChar(Body[I]) && I + 1 != End) + while (I < End && isMacroParameterChar(Body[I])) ++I; const char *Begin = Body.data() + Pos; @@ -2829,11 +2874,14 @@ AsmToken::TokenKind EndTok) { if (MP && MP->Vararg) { if (Lexer.isNot(EndTok)) { - StringRef Str = parseStringTo(EndTok); - MA.emplace_back(AsmToken::String, Str); + SmallVector Str = parseStringRefsTo(EndTok); + for (StringRef S : Str) { + MA.emplace_back(AsmToken::String, S); + } } return false; } + SMLoc StrLoc = Lexer.getLoc(), EndLoc; if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) { const char *StrChar = StrLoc.getPointer() + 1; @@ -2863,7 +2911,7 @@ if (Lexer.is(AsmToken::Space)) { SpaceEaten = true; - Lexer.Lex(); // Eat spaces. + Lex(); // Eat spaces. } // Spaces can delimit parameters, but could also be part an expression. @@ -2872,11 +2920,11 @@ if (!IsDarwin) { if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) { MA.push_back(getTok()); - Lexer.Lex(); + Lex(); // Whitespace after an operator can be ignored. if (Lexer.is(AsmToken::Space)) - Lexer.Lex(); + Lex(); continue; } @@ -2898,7 +2946,7 @@ // Append the token to the current argument list. MA.push_back(getTok()); - Lexer.Lex(); + Lex(); } if (ParenLevel != 0) @@ -5548,7 +5596,7 @@ eatToEndOfStatement(); } - if (getContext().lookupMacro(Name)) { + if (getContext().lookupMacro(Name.lower())) { return Error(NameLoc, "macro '" + Name + "' is already defined"); } @@ -5620,9 +5668,9 @@ DEBUG_WITH_TYPE("asm-macros", dbgs() << "Un-defining macro: " << Name << "\n"); - if (!getContext().lookupMacro(Name)) + if (!getContext().lookupMacro(Name.lower())) return Error(NameLoc, "macro '" + Name + "' is not defined"); - getContext().undefineMacro(Name); + getContext().undefineMacro(Name.lower()); if (!parseOptionalToken(AsmToken::Comma)) break; @@ -5736,16 +5784,17 @@ /// [[text]] /// [[text]] delimiter [[text]] bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) { - StringRef FirstLine = parseStringToEndOfStatement(); + std::string FirstLine = parseStringTo(AsmToken::EndOfStatement); size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A "); - StringRef Delimiter = FirstLine.take_front(DelimiterEnd); + StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd); if (Delimiter.empty()) return Error(DirectiveLoc, "no delimiter in 'comment' directive"); do { if (getTok().is(AsmToken::Eof)) return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive"); Lex(); // eat end of statement - } while (!parseStringToEndOfStatement().contains(Delimiter)); + } while ( + !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter)); return parseToken(AsmToken::EndOfStatement, "unexpected token in 'comment' directive"); } @@ -5759,7 +5808,7 @@ SMLoc IncludeLoc = getTok().getLoc(); if (!parseAngleBracketString(Filename)) - Filename = parseStringToEndOfStatement().str(); + Filename = parseStringTo(AsmToken::EndOfStatement); if (check(!Filename.empty(), "missing filename in 'include' directive") || check(getTok().isNot(AsmToken::EndOfStatement), "unexpected token in 'include' directive") || @@ -6131,9 +6180,9 @@ } } - StringRef Message = ".err directive invoked in source file"; + std::string Message = ".err directive invoked in source file"; if (Lexer.isNot(AsmToken::EndOfStatement)) - Message = parseStringToEndOfStatement(); + Message = parseStringTo(AsmToken::EndOfStatement); Lex(); return Error(DirectiveLoc, Message); @@ -6153,11 +6202,11 @@ if (parseTextItem(Text)) return Error(getTok().getLoc(), "missing text item in '.errb' directive"); - StringRef Message = ".errb directive invoked in source file"; + std::string Message = ".errb directive invoked in source file"; if (Lexer.isNot(AsmToken::EndOfStatement)) { if (parseToken(AsmToken::Comma)) return addErrorSuffix(" in '.errb' directive"); - Message = parseStringToEndOfStatement(); + Message = parseStringTo(AsmToken::EndOfStatement); } Lex(); @@ -6195,11 +6244,11 @@ } } - StringRef Message = ".errdef directive invoked in source file"; + std::string Message = ".errdef directive invoked in source file"; if (Lexer.isNot(AsmToken::EndOfStatement)) { if (parseToken(AsmToken::Comma)) return addErrorSuffix(" in '.errdef' directive"); - Message = parseStringToEndOfStatement(); + Message = parseStringTo(AsmToken::EndOfStatement); } Lex(); @@ -6245,7 +6294,7 @@ return TokError("expected string parameter for '.errdif' directive"); } - StringRef Message; + std::string Message; if (ExpectEqual) Message = ".erridn directive invoked in source file"; else @@ -6253,7 +6302,7 @@ if (Lexer.isNot(AsmToken::EndOfStatement)) { if (parseToken(AsmToken::Comma)) return addErrorSuffix(" in '.erridn' directive"); - Message = parseStringToEndOfStatement(); + Message = parseStringTo(AsmToken::EndOfStatement); } Lex(); @@ -6285,11 +6334,11 @@ if (parseAbsoluteExpression(ExprValue)) return addErrorSuffix(" in '.erre' directive"); - StringRef Message = ".erre directive invoked in source file"; + std::string Message = ".erre directive invoked in source file"; if (Lexer.isNot(AsmToken::EndOfStatement)) { if (parseToken(AsmToken::Comma)) return addErrorSuffix(" in '.erre' directive"); - Message = parseStringToEndOfStatement(); + Message = parseStringTo(AsmToken::EndOfStatement); } Lex(); @@ -6499,12 +6548,9 @@ } bool MasmParser::expandStatement(SMLoc Loc) { - SMLoc StartLoc = getTok().getLoc(); - eatToEndOfStatement(); + std::string Body = parseStringTo(AsmToken::EndOfStatement); SMLoc EndLoc = getTok().getLoc(); - StringRef Body = StringRef(StartLoc.getPointer(), - EndLoc.getPointer() - StartLoc.getPointer()); MCAsmMacroParameters Parameters; MCAsmMacroArguments Arguments; for (const auto &V : Variables) { @@ -6735,7 +6781,7 @@ // Match ml64.exe; treat all characters to end of statement as a string, // ignoring comment markers, then discard anything following a space (using // the C locale). - Argument = parseStringToEndOfStatement().str(); + Argument = parseStringTo(AsmToken::EndOfStatement); if (getTok().is(AsmToken::EndOfStatement)) Argument += getTok().getString(); int End = 0; @@ -6807,7 +6853,8 @@ bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) { const SMLoc Loc = getLexer().getLoc(); - StringRef RadixString = parseStringToEndOfStatement().trim(); + std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement); + StringRef RadixString = StringRef(RadixStringRaw).trim(); unsigned Radix; if (RadixString.getAsInteger(10, Radix)) { return Error(Loc, @@ -6831,13 +6878,9 @@ getTok().getString().equals_lower("echo")); Lexer.Lex(); - SMLoc StartLoc = getTok().getLoc(); - eatToEndOfStatement(); - SMLoc EndLoc = getTok().getLoc(); - StringRef Message = StringRef(StartLoc.getPointer(), - EndLoc.getPointer() - StartLoc.getPointer()); + std::string Message = parseStringTo(AsmToken::EndOfStatement); llvm::outs() << Message; - if (Message.back() != '\n') + if (!StringRef(Message).endswith("\n")) llvm::outs() << '\n'; return false; } diff --git a/llvm/test/tools/llvm-ml/macro_function.test b/llvm/test/tools/llvm-ml/macro_function.test --- a/llvm/test/tools/llvm-ml/macro_function.test +++ b/llvm/test/tools/llvm-ml/macro_function.test @@ -103,4 +103,14 @@ ret expr_recursive_test ENDP +custom_strcat MACRO arg1, arg2 + EXITM +ENDM + +expand_as_directive_test custom_strcat(P, ROC) +; CHECK-LABEL: expand_as_directive_test: + + ret +expand_as_directive_test ENDP + end