diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -1332,6 +1332,8 @@ /// primaryexpr ::= number /// primaryexpr ::= '.' /// primaryexpr ::= ~,+,-,'not' primaryexpr +/// primaryexpr ::= string +/// (a string is interpreted as a 64-bit number in big-endian base-256) bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, AsmTypeInfo *TypeInfo) { SMLoc FirstTokenLoc = getLexer().getLoc(); @@ -1350,7 +1352,6 @@ return false; case AsmToken::Dollar: case AsmToken::At: - case AsmToken::String: case AsmToken::Identifier: { StringRef Identifier; if (parseIdentifier(Identifier)) { @@ -1517,6 +1518,20 @@ } return false; } + case AsmToken::String: { + // MASM strings (used as constants) are interpreted as big-endian base-256. + SMLoc ValueLoc = getTok().getLoc(); + std::string Value; + if (parseEscapedString(Value)) + return true; + if (Value.size() > 8) + return Error(ValueLoc, "literal value out of range"); + uint64_t IntValue = 0; + for (const unsigned char CharVal : Value) + IntValue = (IntValue << 8) | CharVal; + Res = MCConstantExpr::create(IntValue, getContext()); + return false; + } case AsmToken::Real: { APFloat RealVal(APFloat::IEEEdouble(), getTok().getString()); uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); @@ -3168,28 +3183,17 @@ bool MasmParser::parseScalarInitializer(unsigned Size, SmallVectorImpl &Values, unsigned StringPadLength) { - if (getTok().is(AsmToken::String)) { + if (Size == 1 && getTok().is(AsmToken::String)) { std::string Value; if (parseEscapedString(Value)) return true; - if (Size == 1) { - // Treat each character as an initializer. - for (const char CharVal : Value) - Values.push_back(MCConstantExpr::create(CharVal, getContext())); - - // Pad the string with spaces to the specified length. - for (size_t i = Value.size(); i < StringPadLength; ++i) - Values.push_back(MCConstantExpr::create(' ', getContext())); - } else { - // Treat the string as an initial value in big-endian representation. - if (Value.size() > Size) - return Error(getTok().getLoc(), "out of range literal value"); - - uint64_t IntValue = 0; - for (const unsigned char CharVal : Value) - IntValue = (IntValue << 8) | CharVal; - Values.push_back(MCConstantExpr::create(IntValue, getContext())); - } + // Treat each character as an initializer. + for (const unsigned char CharVal : Value) + Values.push_back(MCConstantExpr::create(CharVal, getContext())); + + // Pad the string with spaces to the specified length. + for (size_t i = Value.size(); i < StringPadLength; ++i) + Values.push_back(MCConstantExpr::create(' ', getContext())); } else { const MCExpr *Value; if (parseExpression(Value)) diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1693,20 +1693,25 @@ return Error(Tok.getLoc(), "unknown token in expression"); } LLVM_FALLTHROUGH; + case AsmToken::String: { + if (Parser.isParsingMasm()) { + // MASM parsers handle strings in expressions as constants. + SMLoc ValueLoc = Tok.getLoc(); + int64_t Res; + const MCExpr *Val; + if (Parser.parsePrimaryExpr(Val, End, nullptr)) + return true; + UpdateLocLex = false; + if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr())) + return Error(ValueLoc, "expected absolute value"); + if (SM.onInteger(Res, ErrMsg)) + return Error(ValueLoc, ErrMsg); + break; + } + LLVM_FALLTHROUGH; + } case AsmToken::At: - case AsmToken::String: case AsmToken::Identifier: { - if (Parser.isParsingMasm() && Tok.is(AsmToken::String)) { - // Single-character strings should be treated as integer constants. This - // includes MASM escapes for quotes. - char Quote = Tok.getString().front(); - StringRef Contents = Tok.getStringContents(); - if (Contents.size() == 1 || Contents == std::string(2, Quote)) { - if (SM.onInteger(Contents.front(), ErrMsg)) - return Error(Tok.getLoc(), ErrMsg); - break; - } - } SMLoc IdentLoc = Tok.getLoc(); StringRef Identifier = Tok.getString(); UpdateLocLex = false; diff --git a/llvm/test/tools/llvm-ml/strings.test b/llvm/test/tools/llvm-ml/strings.test --- a/llvm/test/tools/llvm-ml/strings.test +++ b/llvm/test/tools/llvm-ml/strings.test @@ -119,4 +119,25 @@ ret dq_char_test ENDP +string_constant_test PROC +; CHECK-LABEL: string_constant_test: + + mov eax, 'ab' + mov eax, "ab" +; CHECK: mov eax, 24930 +; CHECK: mov eax, 24930 + + mov eax, "abc" + mov eax, 'abc' +; CHECK: mov eax, 6382179 +; CHECK: mov eax, 6382179 + + mov eax, "abc""" + mov eax, 'abc''' +; CHECK: mov eax, 1633837858 +; CHECK: mov eax, 1633837863 + + ret +string_constant_test ENDP + end diff --git a/llvm/test/tools/llvm-ml/strings_errors.test b/llvm/test/tools/llvm-ml/strings_errors.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-ml/strings_errors.test @@ -0,0 +1,15 @@ +; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s --implicit-check-not=error: + +.code + +oversize_string_test PROC + + mov rax, "abcdefghi" + mov rax, 'abcdefghi' +; CHECK: error: literal value out of range +; CHECK: error: literal value out of range + + ret +oversize_string_test ENDP + +end