diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -56,6 +57,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" @@ -65,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -389,7 +392,6 @@ StringRef Name; RedefinableKind Redefinable = REDEFINABLE; bool IsText = false; - int64_t NumericValue = 0; std::string TextValue; }; StringMap Variables; @@ -796,6 +798,37 @@ /// def_range types parsed by this class. StringMap CVDefRangeTypeMap; + // Generic (target and platform independent) directive parsing. + enum BuiltinSymbol { + BI_NO_SYMBOL, // Placeholder + BI_DATE, + BI_TIME, + BI_VERSION, + BI_FILECUR, + BI_FILENAME, + BI_LINE, + BI_CURSEG, + BI_CPU, + BI_INTERFACE, + BI_CODE, + BI_DATA, + BI_FARDATA, + BI_WORDSIZE, + BI_CODESIZE, + BI_DATASIZE, + BI_MODEL, + BI_STACK, + }; + + /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this + /// class. + StringMap BuiltinSymbolMap; + + const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc); + + llvm::Optional evaluateBuiltinTextMacro(BuiltinSymbol Symbol, + SMLoc StartLoc); + // ".ascii", ".asciz", ".string" bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); @@ -1019,6 +1052,7 @@ void initializeDirectiveKindMap(); void initializeCVDefRangeTypeMap(); + void initializeBuiltinSymbolMap(); }; } // end anonymous namespace @@ -1057,6 +1091,7 @@ initializeDirectiveKindMap(); PlatformParser->Initialize(*this); initializeCVDefRangeTypeMap(); + initializeBuiltinSymbolMap(); NumOfMacroInstantiations = 0; } @@ -1125,25 +1160,9 @@ bool MasmParser::expandMacros() { const AsmToken &Tok = getTok(); + const std::string IDLower = Tok.getIdentifier().lower(); - auto VarIt = Variables.find(Tok.getIdentifier().lower()); - if (VarIt != Variables.end() && VarIt->second.IsText) { - std::unique_ptr Instantiation = - MemoryBuffer::getMemBufferCopy(VarIt->second.TextValue, - ""); - - // Jump to the macro instantiation and prime the lexer. - CurBuffer = - SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc()); - Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr, - /*EndStatementAtEOF=*/false); - EndStatementAtEOFStack.push_back(false); - Lexer.Lex(); - return false; - } - - const llvm::MCAsmMacro *M = - getContext().lookupMacro(Tok.getIdentifier().lower()); + const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower); if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) { // This is a macro function invocation; expand it in place. const SMLoc MacroLoc = Tok.getLoc(); @@ -1156,7 +1175,29 @@ return false; } - return true; + llvm::Optional ExpandedValue; + auto BuiltinIt = BuiltinSymbolMap.find(IDLower); + auto VarIt = Variables.find(IDLower); + if (BuiltinIt != BuiltinSymbolMap.end()) { + ExpandedValue = + evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc()); + } else if (VarIt != Variables.end() && VarIt->getValue().IsText) { + ExpandedValue = VarIt->getValue().TextValue; + } + + if (!ExpandedValue.hasValue()) + return true; + std::unique_ptr Instantiation = + MemoryBuffer::getMemBufferCopy(*ExpandedValue, ""); + + // Jump to the macro instantiation and prime the lexer. + CurBuffer = + SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc()); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr, + /*EndStatementAtEOF=*/false); + EndStatementAtEOFStack.push_back(false); + Lexer.Lex(); + return false; } const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) { @@ -1610,6 +1651,19 @@ MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName); if (!Sym) { + // If this is a built-in numeric value, treat it as a constant. + auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower()); + const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end()) + ? BI_NO_SYMBOL + : BuiltinIt->getValue(); + if (Symbol != BI_NO_SYMBOL) { + const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc); + if (Value) { + Res = Value; + return false; + } + } + // Variables use case-insensitive symbol names; if this is a variable, we // find the symbol using its canonical name. auto VarIt = Variables.find(SymbolName.lower()); @@ -2896,16 +2950,17 @@ const char *Begin = Body.data() + Pos; StringRef Argument(Begin, I - Pos); + const std::string ArgumentLower = Argument.lower(); unsigned Index = 0; for (; Index < NParameters; ++Index) - if (Parameters[Index].Name == Argument) + if (Parameters[Index].Name.equals_insensitive(ArgumentLower)) break; if (Index == NParameters) { if (InitialAmpersand) OS << '&'; - auto it = LocalSymbols.find(Argument.lower()); + auto it = LocalSymbols.find(ArgumentLower); if (it != LocalSymbols.end()) OS << it->second; else @@ -3377,6 +3432,10 @@ /// | name "textequ" text-list (redefinability unspecified) bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name, DirectiveKind DirKind, SMLoc NameLoc) { + auto BuiltinIt = BuiltinSymbolMap.find(Name.lower()); + if (BuiltinIt != BuiltinSymbolMap.end()) + return Error(NameLoc, "cannot redefine a built-in symbol"); + Variable &Var = Variables[Name.lower()]; if (Var.Name.empty()) { Var.Name = Name; @@ -3429,12 +3488,18 @@ SMLoc EndLoc; if (parseExpression(Expr, EndLoc)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + StringRef ExprAsString = StringRef( + StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer()); int64_t Value; if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) { + if (DirKind == DK_ASSIGN) + return Error( + StartLoc, + "expected absolute expression; not all symbols have known values", + {StartLoc, EndLoc}); + // Not an absolute expression; define as a text replacement. - StringRef ExprAsString = StringRef( - StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer()); if (!Var.IsText || Var.TextValue != ExprAsString) { switch (Var.Redefinable) { case Variable::NOT_REDEFINABLE: @@ -3449,29 +3514,40 @@ break; } } + Var.IsText = true; Var.TextValue = ExprAsString.str(); - } else { - if (Var.IsText || Var.NumericValue != Value) { - switch (Var.Redefinable) { - case Variable::NOT_REDEFINABLE: - return Error(getTok().getLoc(), "invalid variable redefinition"); - case Variable::WARN_ON_REDEFINITION: - if (Warning(NameLoc, "redefining '" + Name + - "', already defined on the command line")) { - return true; - } - break; - default: - break; + Var.Redefinable = Variable::REDEFINABLE; + + return false; + } + + MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name); + + const MCConstantExpr *PrevValue = + Sym->isVariable() ? dyn_cast_or_null( + Sym->getVariableValue(/*SetUsed*/ false)) + : nullptr; + if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) { + switch (Var.Redefinable) { + case Variable::NOT_REDEFINABLE: + return Error(getTok().getLoc(), "invalid variable redefinition"); + case Variable::WARN_ON_REDEFINITION: + if (Warning(NameLoc, "redefining '" + Name + + "', already defined on the command line")) { + return true; } + break; + default: + break; } - Var.NumericValue = Value; } + + Var.IsText = false; + Var.TextValue.clear(); Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE : Variable::NOT_REDEFINABLE; - MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name); Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE); Sym->setVariableValue(Expr); Sym->setExternal(false); @@ -3539,28 +3615,43 @@ case AsmToken::Identifier: { // This must be a text macro; we need to expand it accordingly. StringRef ID; + SMLoc StartLoc = getTok().getLoc(); if (parseIdentifier(ID)) return true; Data = ID.str(); - auto it = Variables.find(ID.lower()); - if (it == Variables.end()) { - // Not a variable; since we haven't used the token, put it back for better - // error recovery. + auto BuiltinIt = BuiltinSymbolMap.find(ID.lower()); + auto VarIt = Variables.find(ID.lower()); + if (BuiltinIt == BuiltinSymbolMap.end() && VarIt == Variables.end()) { + // Not a variable or built-in text macro; since we haven't used the token, + // put it back for better error recovery. getLexer().UnLex(AsmToken(AsmToken::Identifier, ID)); return true; } - while (it != Variables.end()) { - const Variable &Var = it->second; - if (!Var.IsText) { - // Not a text macro; not usable in TextItem context. Since we haven't - // used the token, put it back for better error recovery. - getLexer().UnLex(AsmToken(AsmToken::Identifier, ID)); - return true; + while (BuiltinIt != BuiltinSymbolMap.end() || VarIt != Variables.end()) { + if (BuiltinIt != BuiltinSymbolMap.end()) { + llvm::Optional BuiltinText = + evaluateBuiltinTextMacro(BuiltinIt->second, StartLoc); + if (!BuiltinText.hasValue()) { + // Not a text macro; not usable in TextItem context. Since we haven't + // used the token, put it back for better error recovery. + getLexer().UnLex(AsmToken(AsmToken::Identifier, ID)); + return true; + } + Data = BuiltinText.getValue(); + } else { + const Variable &Var = VarIt->second; + if (!Var.IsText) { + // Not a text macro; not usable in TextItem context. Since we haven't + // used the token, put it back for better error recovery. + getLexer().UnLex(AsmToken(AsmToken::Identifier, ID)); + return true; + } + Data = Var.TextValue; } - Data = Var.TextValue; - it = Variables.find(StringRef(Data).lower()); + VarIt = Variables.find(StringRef(Data).lower()); + BuiltinIt = BuiltinSymbolMap.find(StringRef(Data).lower()); } return false; } @@ -6181,7 +6272,9 @@ parseToken(AsmToken::EndOfStatement, "unexpected token in 'ifdef'")) return true; - if (Variables.find(Name.lower()) != Variables.end()) { + if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) { + is_defined = true; + } else if (Variables.find(Name.lower()) != Variables.end()) { is_defined = true; } else { MCSymbol *Sym = getContext().lookupSymbol(Name.lower()); @@ -6303,7 +6396,9 @@ "unexpected token in 'elseifdef'")) return true; - if (Variables.find(Name.lower()) != Variables.end()) { + if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) { + is_defined = true; + } else if (Variables.find(Name.lower()) != Variables.end()) { is_defined = true; } else { MCSymbol *Sym = getContext().lookupSymbol(Name); @@ -6473,7 +6568,9 @@ if (check(parseIdentifier(Name), "expected identifier after '.errdef'")) return true; - if (Variables.find(Name.lower()) != Variables.end()) { + if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) { + IsDefined = true; + } else if (Variables.find(Name.lower()) != Variables.end()) { IsDefined = true; } else { MCSymbol *Sym = getContext().lookupSymbol(Name); @@ -6788,16 +6885,36 @@ MCAsmMacroParameters Parameters; MCAsmMacroArguments Arguments; + + StringMap BuiltinValues; + for (const auto &S : BuiltinSymbolMap) { + const BuiltinSymbol &Sym = S.getValue(); + if (llvm::Optional Text = evaluateBuiltinTextMacro(Sym, Loc)) { + BuiltinValues[S.getKey().lower()] = std::move(*Text); + } + } + for (const auto &B : BuiltinValues) { + MCAsmMacroParameter P; + MCAsmMacroArgument A; + P.Name = B.getKey(); + P.Required = true; + A.push_back(AsmToken(AsmToken::String, B.getValue())); + + Parameters.push_back(std::move(P)); + Arguments.push_back(std::move(A)); + } + for (const auto &V : Variables) { const Variable &Var = V.getValue(); if (Var.IsText) { - Parameters.emplace_back(); - Arguments.emplace_back(); - MCAsmMacroParameter &P = Parameters.back(); - MCAsmMacroArgument &A = Arguments.back(); + MCAsmMacroParameter P; + MCAsmMacroArgument A; P.Name = Var.Name; P.Required = true; A.push_back(AsmToken(AsmToken::String, Var.TextValue)); + + Parameters.push_back(std::move(P)); + Arguments.push_back(std::move(A)); } } MacroLikeBodies.emplace_back(StringRef(), Body, Parameters); @@ -7520,6 +7637,100 @@ return false; } +void MasmParser::initializeBuiltinSymbolMap() { + // Numeric built-ins (supported in all versions) + BuiltinSymbolMap["@version"] = BI_VERSION; + BuiltinSymbolMap["@line"] = BI_LINE; + + // Text built-ins (supported in all versions) + BuiltinSymbolMap["@date"] = BI_DATE; + BuiltinSymbolMap["@time"] = BI_TIME; + BuiltinSymbolMap["@filecur"] = BI_FILECUR; + BuiltinSymbolMap["@filename"] = BI_FILENAME; + BuiltinSymbolMap["@curseg"] = BI_CURSEG; + + // Some built-ins exist only for MASM32 (32-bit x86) + if (getContext().getSubtargetInfo()->getTargetTriple().getArch() == + Triple::x86) { + // Numeric built-ins + // BuiltinSymbolMap["@cpu"] = BI_CPU; + // BuiltinSymbolMap["@interface"] = BI_INTERFACE; + // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE; + // BuiltinSymbolMap["@codesize"] = BI_CODESIZE; + // BuiltinSymbolMap["@datasize"] = BI_DATASIZE; + // BuiltinSymbolMap["@model"] = BI_MODEL; + + // Text built-ins + // BuiltinSymbolMap["@code"] = BI_CODE; + // BuiltinSymbolMap["@data"] = BI_DATA; + // BuiltinSymbolMap["@fardata?"] = BI_FARDATA; + // BuiltinSymbolMap["@stack"] = BI_STACK; + } +} + +const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol, + SMLoc StartLoc) { + switch (Symbol) { + default: + return nullptr; + case BI_VERSION: + // Match a recent version of ML.EXE. + return MCConstantExpr::create(1427, getContext()); + case BI_LINE: { + int64_t Line; + if (ActiveMacros.empty()) + Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer); + else + Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc, + ActiveMacros.front()->ExitBuffer); + return MCConstantExpr::create(Line, getContext()); + } + } + llvm_unreachable("unhandled built-in symbol"); +} + +llvm::Optional +MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) { + switch (Symbol) { + default: + return {}; + case BI_DATE: { + // Current local date, formatted MM/DD/YY + time_t TT = time(nullptr); + struct tm *TM = localtime(&TT); + SmallString<32> TmpBuffer; + llvm::raw_svector_ostream TmpStream(TmpBuffer); + TmpStream << llvm::format("%02d/%02d/%02d", TM->tm_mon + 1, TM->tm_mday, + TM->tm_year % 100); + return TmpStream.str().str(); + } + case BI_TIME: { + // Current local time, formatted HH:MM:SS + time_t TT = time(nullptr); + struct tm *TM = localtime(&TT); + SmallString<32> TmpBuffer; + llvm::raw_svector_ostream TmpStream(TmpBuffer); + TmpStream << llvm::format("%02d:%02d:%02d", TM->tm_hour, TM->tm_min, + TM->tm_sec); + return TmpStream.str().str(); + } + case BI_FILECUR: + return SrcMgr + .getMemoryBuffer( + ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer) + ->getBufferIdentifier() + .str(); + case BI_FILENAME: + return sys::path::stem(sys::path::filename( + SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID()) + ->getBufferIdentifier())) + .upper(); + case BI_CURSEG: + return getStreamer().getCurrentSectionOnly()->getName().str(); + } + llvm_unreachable("unhandled built-in symbol"); +} + /// Create an MCAsmParser instance. MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C, MCStreamer &Out, const MCAsmInfo &MAI, diff --git a/llvm/test/tools/llvm-ml/builtin_symbols.asm b/llvm/test/tools/llvm-ml/builtin_symbols.asm new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-ml/builtin_symbols.asm @@ -0,0 +1,57 @@ +; RUN: llvm-ml -filetype=s %s /I %S /Fo /dev/null 2>&1 | FileCheck %s + +.code + +version_val TEXTEQU %@Version + +ECHO t1: +%ECHO @Version = version_val +; CHECK-LABEL: t1: +; CHECK-NEXT: 1427 + +ECHO + +ECHO t2: +if @Version gt 510 +ECHO @Version gt 510 +endif +; CHECK-LABEL: t2: +; CHECK-NEXT: @Version gt 510 + +ECHO + +ECHO t3: +if @Version le 510 +ECHO le 510 +endif +; CHECK-LABEL: t3: +; CHECK-NOT: @Version le 510 + +ECHO + +line_val TEXTEQU %@Line + +ECHO t4: +%ECHO @Line = line_val +; CHECK-LABEL: t4: +; CHECK-NEXT: @Line = [[# @LINE - 5]] + +ECHO t5: +include builtin_symbols_t5.inc +; CHECK-LABEL: t5: +; CHECK: FileCur = +; CHECK-SAME: builtin_symbols_t5.inc +; CHECK: FileName = +; CHECK-SAME: BUILTIN_SYMBOLS +; CHECK-NOT: T5 + +ECHO t6: +%ECHO Date = @Date +%ECHO Time = @Time + +; CHECK-LABEL: t6: +; CHECK: Date = {{([[:digit:]]{2}/[[:digit:]]{2}/[[:digit:]]{2})}} +; CHECK-NOT: {{[[:digit:]]}} +; CHECK: Time = {{([[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2})}} + +end diff --git a/llvm/test/tools/llvm-ml/builtin_symbols_t5.inc b/llvm/test/tools/llvm-ml/builtin_symbols_t5.inc new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-ml/builtin_symbols_t5.inc @@ -0,0 +1,2 @@ +%echo FileCur = @FileCur +%echo FileName = @FileName diff --git a/llvm/test/tools/llvm-ml/variable_redef_errors.asm b/llvm/test/tools/llvm-ml/variable_redef_errors.asm --- a/llvm/test/tools/llvm-ml/variable_redef_errors.asm +++ b/llvm/test/tools/llvm-ml/variable_redef_errors.asm @@ -7,6 +7,9 @@ ; CHECK: :[[# @LINE + 1]]:21: error: invalid variable redefinition equated_number equ 4 +; CHECK: :[[# @LINE + 1]]:1: error: cannot redefine a built-in symbol +@Line equ 5 + .code end