diff --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h --- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h @@ -50,6 +50,8 @@ bool AllowAtInIdentifier; bool IsAtStartOfStatement = true; bool LexMasmIntegers = false; + bool UseMasmDefaultRadix = false; + unsigned DefaultRadix = 10; AsmCommentConsumer *CommentConsumer = nullptr; MCAsmLexer(); @@ -147,9 +149,16 @@ this->CommentConsumer = CommentConsumer; } - /// Set whether to lex masm-style binary and hex literals. They look like - /// 0b1101 and 0ABCh respectively. + /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified + /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]). void setLexMasmIntegers(bool V) { LexMasmIntegers = V; } + + /// Set whether to use masm-style default-radix integer literals. If disabled, + /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]). + void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; } + + unsigned getMasmDefaultRadix() const { return DefaultRadix; } + void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; } }; } // end namespace llvm diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SaveAndRestore.h" #include @@ -271,13 +272,34 @@ return DefaultRadix; } -static AsmToken intToken(StringRef Ref, APInt &Value) -{ +static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) { + while (hexDigitValue(*CurPtr) < DefaultRadix) { + ++CurPtr; + } + return CurPtr; +} + +static AsmToken intToken(StringRef Ref, APInt &Value) { if (Value.isIntN(64)) return AsmToken(AsmToken::Integer, Ref, Value); return AsmToken(AsmToken::BigNum, Ref, Value); } +static std::string radixName(unsigned Radix) { + switch (Radix) { + case 2: + return "binary"; + case 8: + return "octal"; + case 10: + return "decimal"; + case 16: + return "hexadecimal"; + default: + return "base-" + std::to_string(Radix); + } +} + /// LexDigit: First character is [0-9]. /// Local Label: [0-9][:] /// Forward/Backward Label: [0-9][fb] @@ -286,16 +308,46 @@ /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] /// Decimal integer: [1-9][0-9]* AsmToken AsmLexer::LexDigit() { - // MASM-flavor binary integer: [01]+[bB] + // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY]) + // MASM-flavor octal integer: [0-7]+[oOqQ] + // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT]) // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH] if (LexMasmIntegers && isdigit(CurPtr[-1])) { - const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? - CurPtr - 1 : nullptr; + const char *FirstNonBinary = + (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr; + const char *FirstNonDecimal = + (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr; const char *OldCurPtr = CurPtr; while (isHexDigit(*CurPtr)) { - if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary) - FirstNonBinary = CurPtr; + switch (*CurPtr) { + default: + if (!FirstNonDecimal) { + FirstNonDecimal = CurPtr; + } + LLVM_FALLTHROUGH; + case '9': + case '8': + case '7': + case '6': + case '5': + case '4': + case '3': + case '2': + if (!FirstNonBinary) { + FirstNonBinary = CurPtr; + } + break; + case '1': + case '0': + break; + } + ++CurPtr; + } + if (*CurPtr == '.') { + // MASM float literals (other than hex floats) always contain a ".", and + // are always written in decimal. ++CurPtr; + return LexFloatLiteral(); } unsigned Radix = 0; @@ -303,28 +355,61 @@ // hexadecimal number ++CurPtr; Radix = 16; + } else if (*CurPtr == 't' || *CurPtr == 'T') { + // decimal number + ++CurPtr; + Radix = 10; + } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' || + *CurPtr == 'Q') { + // octal number + ++CurPtr; + Radix = 8; + } else if (*CurPtr == 'y' || *CurPtr == 'Y') { + // binary number + ++CurPtr; + Radix = 2; + } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr && + DefaultRadix < 14 && + (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) { + Radix = 10; } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr && - (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) + DefaultRadix < 12 && + (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) { Radix = 2; + } - if (Radix == 2 || Radix == 16) { + if (Radix) { StringRef Result(TokStart, CurPtr - TokStart); APInt Value(128, 0, true); if (Result.drop_back().getAsInteger(Radix, Value)) - return ReturnError(TokStart, Radix == 2 ? "invalid binary number" : - "invalid hexdecimal number"); + return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); // MSVC accepts and ignores type suffices on integer literals. SkipIgnoredIntegerSuffix(CurPtr); return intToken(Result, Value); - } + } - // octal/decimal integers, or floating point numbers, fall through + // default-radix integers, or floating point numbers, fall through CurPtr = OldCurPtr; } + // MASM default-radix integers: [0-9a-fA-F]+ + // (All other integer literals have a radix specifier.) + if (LexMasmIntegers && UseMasmDefaultRadix) { + CurPtr = findLastDigit(CurPtr, 16); + StringRef Result(TokStart, CurPtr - TokStart); + + APInt Value(128, 0, true); + if (Result.getAsInteger(DefaultRadix, Value)) { + return ReturnError(TokStart, + "invalid " + radixName(DefaultRadix) + " number"); + } + + return intToken(Result, Value); + } + // Decimal integer: [1-9][0-9]* if (CurPtr[-1] != '0' || CurPtr[0] == '.') { unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers); @@ -339,13 +424,9 @@ StringRef Result(TokStart, CurPtr - TokStart); APInt Value(128, 0, true); - if (Result.getAsInteger(Radix, Value)) - return ReturnError(TokStart, !isHex ? "invalid decimal number" : - "invalid hexdecimal number"); - - // Consume the [hH]. - if (LexMasmIntegers && Radix == 16) - ++CurPtr; + if (Result.getAsInteger(Radix, Value)) { + return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); + } // The darwin/x86 (and x86-64) assembler accepts and ignores type // suffices on integer literals. @@ -416,11 +497,9 @@ // Either octal or hexadecimal. APInt Value(128, 0, true); unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers); - bool isHex = Radix == 16; StringRef Result(TokStart, CurPtr - TokStart); if (Result.getAsInteger(Radix, Value)) - return ReturnError(TokStart, !isHex ? "invalid octal number" : - "invalid hexdecimal number"); + return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); // Consume the [hH]. if (Radix == 16) diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp --- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp @@ -132,7 +132,6 @@ // option // popcontext // pushcontext - // .radix // .safeseh // Procedure directives diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -732,6 +732,7 @@ DK_SAVEREG, DK_SAVEXMM128, DK_SETFRAME, + DK_RADIX, }; /// Maps directive name --> DirectiveKind enum, for directives parsed by this @@ -964,6 +965,9 @@ // ".erre" or ".errnz", depending on ExpectZero. bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero); + // ".radix" + bool parseDirectiveRadix(SMLoc DirectiveLoc); + // "echo" bool parseDirectiveEcho(); @@ -2284,6 +2288,8 @@ return parseDirectiveErrorIfe(IDLoc, true); case DK_ERRNZ: return parseDirectiveErrorIfe(IDLoc, false); + case DK_RADIX: + return parseDirectiveRadix(IDLoc); case DK_ECHO: return parseDirectiveEcho(); } @@ -6343,6 +6349,7 @@ DirectiveKindMap[".savereg"] = DK_SAVEREG; DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128; DirectiveKindMap[".setframe"] = DK_SETFRAME; + DirectiveKindMap[".radix"] = DK_RADIX; // DirectiveKindMap[".altmacro"] = DK_ALTMACRO; // DirectiveKindMap[".noaltmacro"] = DK_NOALTMACRO; DirectiveKindMap["db"] = DK_DB; @@ -6584,6 +6591,22 @@ return false; } +bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) { + const SMLoc Loc = getLexer().getLoc(); + StringRef RadixString = parseStringToEndOfStatement().trim(); + unsigned Radix; + if (RadixString.getAsInteger(10, Radix)) { + return Error(Loc, + "radix must be a decimal number in the range 2 to 16; was " + + RadixString); + } + if (Radix < 2 || Radix > 16) + return Error(Loc, "radix must be in the range 2 to 16; was " + + std::to_string(Radix)); + getLexer().setMasmDefaultRadix(Radix); + return false; +} + bool MasmParser::parseDirectiveEcho() { StringRef Message = parseStringToEndOfStatement(); Lex(); // eat end of statement diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1662,6 +1662,9 @@ if ((Done = SM.isValidEndState())) break; return Error(Tok.getLoc(), "unknown token in expression"); + case AsmToken::Error: + return Error(getLexer().getErrLoc(), getLexer().getErr()); + break; case AsmToken::EndOfStatement: Done = true; break; @@ -2453,21 +2456,26 @@ // Parse memory broadcasting ({1to}). if (getLexer().getTok().getIntVal() != 1) return TokError("Expected 1to at this point"); - Parser.Lex(); // Eat "1" of 1to8 - if (!getLexer().is(AsmToken::Identifier) || - !getLexer().getTok().getIdentifier().startswith("to")) + StringRef Prefix = getLexer().getTok().getString(); + Parser.Lex(); // Eat first token of 1to8 + if (!getLexer().is(AsmToken::Identifier)) return TokError("Expected 1to at this point"); // Recognize only reasonable suffixes. + SmallVector BroadcastVector; + StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier()) + .toStringRef(BroadcastVector); + if (!BroadcastString.startswith("1to")) + return TokError("Expected 1to at this point"); const char *BroadcastPrimitive = - StringSwitch(getLexer().getTok().getIdentifier()) - .Case("to2", "{1to2}") - .Case("to4", "{1to4}") - .Case("to8", "{1to8}") - .Case("to16", "{1to16}") - .Default(nullptr); + StringSwitch(BroadcastString) + .Case("1to2", "{1to2}") + .Case("1to4", "{1to4}") + .Case("1to8", "{1to8}") + .Case("1to16", "{1to16}") + .Default(nullptr); if (!BroadcastPrimitive) return TokError("Invalid memory broadcast primitive."); - Parser.Lex(); // Eat "toN" of 1toN + Parser.Lex(); // Eat trailing token of 1toN if (!getLexer().is(AsmToken::RCurly)) return TokError("Expected } at this point"); Parser.Lex(); // Eat "}" diff --git a/llvm/test/tools/llvm-ml/radix.test b/llvm/test/tools/llvm-ml/radix.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-ml/radix.test @@ -0,0 +1,97 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.code + +t1: +mov eax, 100b +mov eax, 100y + +; CHECK-LABEL: t1: +; CHECK-NEXT: mov eax, 4 +; CHECK-NEXT: mov eax, 4 + +t2: +mov eax, 100o +mov eax, 100q + +; CHECK-LABEL: t2: +; CHECK-NEXT: mov eax, 64 +; CHECK-NEXT: mov eax, 64 + +t3: +mov eax, 100d +mov eax, 100t + +; CHECK-LABEL: t3: +; CHECK-NEXT: mov eax, 100 +; CHECK-NEXT: mov eax, 100 + +t4: +mov eax, 100h + +; CHECK-LABEL: t4: +; CHECK-NEXT: mov eax, 256 + +t5: +mov eax, 100 +.radix 2 +mov eax, 100 +.radix 16 +mov eax, 100 +.radix 10 +mov eax, 100 + +; CHECK-LABEL: t5: +; CHECK: mov eax, 100 +; CHECK: mov eax, 4 +; CHECK: mov eax, 256 +; CHECK: mov eax, 100 + +t6: +.radix 9 +mov eax, 100 +.radix 10 + +; CHECK-LABEL: t6: +; CHECK: mov eax, 81 + +t7: +.radix 12 +mov eax, 100b +mov eax, 100y +.radix 10 + +; CHECK-LABEL: t7: +; CHECK: mov eax, 1739 +; CHECK: mov eax, 4 + +t8: +.radix 16 +mov eax, 100d +mov eax, 100t +.radix 10 + +; CHECK-LABEL: t8: +; CHECK: mov eax, 4109 +; CHECK: mov eax, 100 + +t9: +.radix 12 +mov eax, 102b +.radix 16 +mov eax, 10fd +.radix 10 + +; CHECK-LABEL: t9: +; CHECK: mov eax, 1763 +; CHECK: mov eax, 4349 + +t10: +.radix 16 +mov eax, 1e1 +.radix 10 + +; CHECK-LABEL: t10: +; CHECK: mov eax, 481 + +END diff --git a/llvm/test/tools/llvm-ml/radix_errors.test b/llvm/test/tools/llvm-ml/radix_errors.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-ml/radix_errors.test @@ -0,0 +1,60 @@ +; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s + +.code + +t1: +mov eax, 120b +mov eax, 120y +.radix 11 +mov eax, 120b +mov eax, 120y +.radix 10 + +; CHECK: error: invalid decimal number +; CHECK: error: invalid binary number +; CHECK: error: invalid base-11 number +; CHECK: error: invalid binary number + +t2: +mov eax, 190o +mov eax, 190q +.radix 13 +mov eax, 190o +mov eax, 190q +.radix 10 + +; CHECK: error: invalid octal number +; CHECK: error: invalid octal number +; CHECK: error: invalid octal number +; CHECK: error: invalid octal number + +t3: +mov eax, 1f0d +mov eax, 1f0t +.radix 13 +mov eax, 1f0d +mov eax, 1f0t +.radix 10 + +; CHECK: error: invalid decimal number +; CHECK: error: invalid decimal number +; CHECK: error: invalid base-13 number +; CHECK: error: invalid decimal number + +t4: +mov eax, 10e +.radix 16 +.radix 10 +mov eax, 10e + +; CHECK: error: invalid decimal number +; CHECK: error: invalid decimal number + +t5: +.radix 9 +mov eax, 9 +.radix 10 + +; CHECK: error: invalid base-9 number + +END diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp --- a/llvm/tools/llvm-ml/llvm-ml.cpp +++ b/llvm/tools/llvm-ml/llvm-ml.cpp @@ -176,6 +176,7 @@ AsmLexer Lexer(MAI); Lexer.setBuffer(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer()); Lexer.setLexMasmIntegers(true); + Lexer.useMasmDefaultRadix(true); bool Error = false; while (Lexer.Lex().isNot(AsmToken::Eof)) { @@ -206,6 +207,7 @@ Parser->setShowParsedOperands(ShowInstOperands); Parser->setTargetParser(*TAP); Parser->getLexer().setLexMasmIntegers(true); + Parser->getLexer().useMasmDefaultRadix(true); int Res = Parser->Run(/*NoInitialTextSection=*/true);