diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -500,12 +500,25 @@ /// Codeview def_range types parsed by this class. StringMap CVDefRangeTypeMap; + bool parseInitValue(unsigned Size); + // ".ascii", ".asciz", ".string" bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); - bool parseDirectiveValue(StringRef IDVal, - unsigned Size); // "byte", "word", ... - bool parseDirectiveRealValue(StringRef IDVal, - const fltSemantics &); // "real4", ... + + // "byte", "word", ... + bool parseScalarInstList(unsigned Size, + SmallVectorImpl &Values); + bool parseDirectiveValue(StringRef IDVal, unsigned Size); + bool parseDirectiveNamedValue(StringRef IDVal, unsigned Size, StringRef Name, + SMLoc NameLoc); + + // "real4", "real8" + bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics); + bool parseRealInstList(const fltSemantics &Semantics, + SmallVectorImpl &Values); + bool parseDirectiveNamedRealValue(StringRef IDVal, + const fltSemantics &Semantics, + StringRef Name, SMLoc NameLoc); // "=", "equ", "textequ" bool parseDirectiveEquate(StringRef IDVal, StringRef Name, @@ -1960,6 +1973,33 @@ case DK_TEXTEQU: Lex(); return parseDirectiveEquate(nextVal, IDVal, DirKind); + case DK_BYTE: + case DK_DB: + Lex(); + return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc); + case DK_WORD: + case DK_DW: + Lex(); + return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc); + case DK_DWORD: + case DK_DD: + Lex(); + return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc); + case DK_FWORD: + Lex(); + return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc); + case DK_QWORD: + case DK_DQ: + Lex(); + return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc); + case DK_REAL4: + Lex(); + return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), IDVal, + IDLoc); + case DK_REAL8: + Lex(); + return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), IDVal, + IDLoc); } // __asm _emit or __asm __emit @@ -2796,31 +2836,99 @@ return false; } +bool MasmParser::parseScalarInstList(unsigned Size, + SmallVectorImpl &Values) { + do { + if (getTok().is(AsmToken::String)) { + StringRef Value = getTok().getStringContents(); + if (Size == 1) { + // Treat each character as an initializer. + for (const char CharVal : Value) + Values.push_back(MCConstantExpr::create(CharVal, getContext())); + } else { + // Treat the string as an initial value in big-endian representation. + if (Value.size() > Size) + return Error(getTok().getLoc(), "out of range literal value"); + + uint64_t IntValue = 0; + for (const unsigned char CharVal : Value.bytes()) + IntValue = (IntValue << 8) | CharVal; + Values.push_back(MCConstantExpr::create(IntValue, getContext())); + } + Lex(); + } else { + const MCExpr *Value; + if (checkForValidSection() || parseExpression(Value)) + return true; + if (getTok().is(AsmToken::Identifier) && + getTok().getString().equals_lower("dup")) { + Lex(); // eat 'dup' + const MCConstantExpr *MCE = dyn_cast(Value); + if (!MCE) + return Error(Value->getLoc(), + "cannot repeat value a non-constant number of times"); + const int64_t Repetitions = MCE->getValue(); + if (Repetitions < 0) + return Error(Value->getLoc(), + "cannot repeat value a negative number of times"); + + SmallVector DuplicatedValues; + if (parseToken(AsmToken::LParen, + "parentheses required for 'dup' contents") || + parseScalarInstList(Size, DuplicatedValues) || + parseToken(AsmToken::RParen, "unmatched parentheses")) + return true; + + for (int i = 0; i < Repetitions; ++i) + Values.append(DuplicatedValues.begin(), DuplicatedValues.end()); + } else { + Values.push_back(Value); + } + } + + // Continue if we see a comma. (Also, allow line continuation.) + } while (parseOptionalToken(AsmToken::Comma) && + (getTok().isNot(AsmToken::EndOfStatement) || + !parseToken(AsmToken::EndOfStatement))); + + return false; +} + /// parseDirectiveValue /// ::= (byte | word | ... ) [ expression (, expression)* ] bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) { - auto parseOp = [&]() -> bool { - const MCExpr *Value; - SMLoc ExprLoc = getLexer().getLoc(); - if (checkForValidSection() || parseExpression(Value)) - return true; + SmallVector Values; + if (parseScalarInstList(Size, Values)) + return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + + for (const MCExpr *Value : Values) { // Special case constant expressions to match code generator. if (const MCConstantExpr *MCE = dyn_cast(Value)) { assert(Size <= 8 && "Invalid size"); int64_t IntValue = MCE->getValue(); if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) - return Error(ExprLoc, "out of range literal value"); + return Error(MCE->getLoc(), "out of range literal value"); getStreamer().emitIntValue(IntValue, Size); - } else - getStreamer().emitValue(Value, Size, ExprLoc); - return false; - }; - - if (parseMany(parseOp)) - return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + } else if (const MCSymbolRefExpr *MSE = dyn_cast(Value); + MSE && MSE->getSymbol().getName() == "?") { + // ? initializer; treat as 0. + getStreamer().emitIntValue(0, Size); + } else { + getStreamer().emitValue(Value, Size, Value->getLoc()); + } + } return false; } +/// parseDirectiveNamedValue +/// ::= name (byte | word | ... ) [ expression (, expression)* ] +bool MasmParser::parseDirectiveNamedValue(StringRef IDVal, unsigned Size, + StringRef Name, SMLoc NameLoc) { + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + getStreamer().emitLabel(Sym); + return parseDirectiveValue(IDVal, Size); +} + static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) { if (Asm.getTok().isNot(AsmToken::Integer) && Asm.getTok().isNot(AsmToken::BigNum)) @@ -2881,24 +2989,75 @@ return false; } +bool MasmParser::parseRealInstList(const fltSemantics &Semantics, + SmallVectorImpl &ValuesAsInt) { + do { + const AsmToken NextTok = Lexer.peekTok(); + if (NextTok.is(AsmToken::Identifier) && + NextTok.getString().equals_lower("dup")) { + const MCExpr *Value; + if (parseExpression(Value) || parseToken(AsmToken::Identifier)) + return true; + const MCConstantExpr *MCE = dyn_cast(Value); + if (!MCE) + return Error(Value->getLoc(), + "cannot repeat value a non-constant number of times"); + const int64_t Repetitions = MCE->getValue(); + if (Repetitions < 0) + return Error(Value->getLoc(), + "cannot repeat value a negative number of times"); + + SmallVector DuplicatedValues; + if (parseToken(AsmToken::LParen, + "parentheses required for 'dup' contents") || + parseRealInstList(Semantics, DuplicatedValues) || + parseToken(AsmToken::RParen, "unmatched parentheses")) + return true; + + for (int i = 0; i < Repetitions; ++i) + ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end()); + } else { + APInt AsInt; + if (parseRealValue(Semantics, AsInt)) + return true; + ValuesAsInt.push_back(AsInt); + } + // Continue if we see a comma. (Also, allow line continuation.) + } while (parseOptionalToken(AsmToken::Comma) && + (getTok().isNot(AsmToken::EndOfStatement) || + !parseToken(AsmToken::EndOfStatement))); + + return false; +} + /// parseDirectiveRealValue /// ::= (real4 | real8) [ expression (, expression)* ] bool MasmParser::parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics) { - auto parseOp = [&]() -> bool { - APInt AsInt; - if (checkForValidSection() || parseRealValue(Semantics, AsInt)) - return true; - getStreamer().emitIntValue(AsInt.getLimitedValue(), - AsInt.getBitWidth() / 8); - return false; - }; + if (checkForValidSection()) + return true; - if (parseMany(parseOp)) + SmallVector ValuesAsInt; + if (parseRealInstList(Semantics, ValuesAsInt)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + + for (const APInt &AsInt : ValuesAsInt) { + getStreamer().emitIntValue(AsInt.getLimitedValue(), + AsInt.getBitWidth() / 8); + } return false; } +/// parseDirectiveNamedRealValue +/// ::= name (real4 | real8) [ expression (, expression)* ] +bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal, + const fltSemantics &Semantics, + StringRef Name, SMLoc NameLoc) { + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + getStreamer().emitLabel(Sym); + return parseDirectiveRealValue(IDVal, Semantics); +} + /// parseDirectiveOrg /// ::= .org expression [ , expression ] bool MasmParser::parseDirectiveOrg() { diff --git a/llvm/test/tools/llvm-ml/basic_data.test b/llvm/test/tools/llvm-ml/basic_data.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-ml/basic_data.test @@ -0,0 +1,38 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data +BYTE 2, 4, 6, 8 +; CHECK: .data +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .byte 8 + +BYTE 2 dup (1, 2 dup (2)), + 3 +; CHECK: .byte 1 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 3 + +REAL4 1, 0 +; CHECK: .long 1065353216 +; CHECK-NEXT: .long 0 + +REAL4 2 DUP (2.5, 2 dup (0)), + 4 +; CHECK: .long 1075838976 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 1075838976 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 1082130432 + +.code +BYTE 5 +; CHECK: .text +; CHECK-NEXT: .byte 5