Skip to content

Commit 8601ac1

Browse files
committedAug 2, 2016
[MC] Fix Intel Operand assembly parsing for .set ids
Recommitting after fixing overaggressive fastpath return in parsing. Fix intel syntax special case identifier operands that refer to a constant (e.g. .set <ID> n) to be interpreted as immediate not memory in parsing. Associated commit to fix clang test commited shortly. Reviewers: rnk Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D22585 llvm-svn: 277489
1 parent ca2f9d1 commit 8601ac1

File tree

3 files changed

+98
-116
lines changed

3 files changed

+98
-116
lines changed
 

‎llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

+80-116
Original file line numberDiff line numberDiff line change
@@ -698,14 +698,11 @@ class X86AsmParser : public MCTargetAsmParser {
698698
std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
699699
std::unique_ptr<X86Operand>
700700
ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
701-
std::unique_ptr<X86Operand>
702-
ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
703701
std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
704702
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
705-
std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
706-
SMLoc Start,
707-
int64_t ImmDisp,
708-
unsigned Size);
703+
std::unique_ptr<X86Operand>
704+
ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp,
705+
bool isSymbol, unsigned Size);
709706
bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
710707
InlineAsmIdentifierInfo &Info,
711708
bool IsUnevaluatedOperand, SMLoc &End);
@@ -1271,7 +1268,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
12711268

12721269
// The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
12731270
// identifier. Don't try an parse it as a register.
1274-
if (Tok.getString().startswith("."))
1271+
if (PrevTK != AsmToken::Error && Tok.getString().startswith("."))
12751272
break;
12761273

12771274
// If we're parsing an immediate expression, we don't expect a '['.
@@ -1386,7 +1383,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
13861383

13871384
std::unique_ptr<X86Operand>
13881385
X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1389-
int64_t ImmDisp, unsigned Size) {
1386+
int64_t ImmDisp, bool isSymbol,
1387+
unsigned Size) {
13901388
MCAsmParser &Parser = getParser();
13911389
const AsmToken &Tok = Parser.getTok();
13921390
SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
@@ -1436,6 +1434,21 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
14361434
Disp = NewDisp;
14371435
}
14381436

1437+
if (isSymbol) {
1438+
if (SM.getSym()) {
1439+
Error(Start, "cannot use more than one symbol in memory operand");
1440+
return nullptr;
1441+
}
1442+
if (SM.getBaseReg()) {
1443+
Error(Start, "cannot use base register with variable reference");
1444+
return nullptr;
1445+
}
1446+
if (SM.getIndexReg()) {
1447+
Error(Start, "cannot use index register with variable reference");
1448+
return nullptr;
1449+
}
1450+
}
1451+
14391452
int BaseReg = SM.getBaseReg();
14401453
int IndexReg = SM.getIndexReg();
14411454
int Scale = SM.getScale();
@@ -1541,7 +1554,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
15411554
}
15421555

15431556
if (getLexer().is(AsmToken::LBrac))
1544-
return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1557+
return ParseIntelBracExpression(SegReg, Start, ImmDisp, false, Size);
15451558

15461559
const MCExpr *Val;
15471560
SMLoc End;
@@ -1598,66 +1611,6 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
15981611
}
15991612
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
16001613
}
1601-
/// ParseIntelMemOperand - Parse intel style memory operand.
1602-
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1603-
SMLoc Start,
1604-
unsigned Size) {
1605-
MCAsmParser &Parser = getParser();
1606-
const AsmToken &Tok = Parser.getTok();
1607-
SMLoc End;
1608-
1609-
// Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1610-
if (getLexer().is(AsmToken::LBrac))
1611-
return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1612-
assert(ImmDisp == 0);
1613-
1614-
const MCExpr *Val;
1615-
if (!isParsingInlineAsm()) {
1616-
if (getParser().parsePrimaryExpr(Val, End))
1617-
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1618-
1619-
return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1620-
}
1621-
1622-
InlineAsmIdentifierInfo Info;
1623-
StringRef Identifier = Tok.getString();
1624-
if (ParseIntelIdentifier(Val, Identifier, Info,
1625-
/*Unevaluated=*/false, End))
1626-
return nullptr;
1627-
1628-
if (!getLexer().is(AsmToken::LBrac))
1629-
return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1630-
/*Scale=*/1, Start, End, Size, Identifier, Info);
1631-
1632-
Parser.Lex(); // Eat '['
1633-
1634-
// Parse Identifier [ ImmDisp ]
1635-
IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1636-
/*AddImmPrefix=*/false);
1637-
if (ParseIntelExpression(SM, End))
1638-
return nullptr;
1639-
1640-
if (SM.getSym()) {
1641-
Error(Start, "cannot use more than one symbol in memory operand");
1642-
return nullptr;
1643-
}
1644-
if (SM.getBaseReg()) {
1645-
Error(Start, "cannot use base register with variable reference");
1646-
return nullptr;
1647-
}
1648-
if (SM.getIndexReg()) {
1649-
Error(Start, "cannot use index register with variable reference");
1650-
return nullptr;
1651-
}
1652-
1653-
const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
1654-
// BaseReg is non-zero to avoid assertions. In the context of inline asm,
1655-
// we're pointing to a local variable in memory, so the base register is
1656-
// really the frame or stack pointer.
1657-
return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1658-
/*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1659-
Start, End, Size, Identifier, Info.OpDecl);
1660-
}
16611614

16621615
/// Parse the '.' operator.
16631616
bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
@@ -1804,49 +1757,8 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
18041757
Parser.Lex(); // Eat ptr.
18051758
PtrInOperand = true;
18061759
}
1807-
Start = Tok.getLoc();
1808-
1809-
// Immediate.
1810-
if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1811-
getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1812-
AsmToken StartTok = Tok;
1813-
IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1814-
/*AddImmPrefix=*/false);
1815-
if (ParseIntelExpression(SM, End))
1816-
return nullptr;
1817-
1818-
int64_t Imm = SM.getImm();
1819-
if (isParsingInlineAsm()) {
1820-
unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1821-
if (StartTok.getString().size() == Len)
1822-
// Just add a prefix if this wasn't a complex immediate expression.
1823-
InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
1824-
else
1825-
// Otherwise, rewrite the complex expression as a single immediate.
1826-
InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
1827-
}
1828-
1829-
if (getLexer().isNot(AsmToken::LBrac)) {
1830-
// If a directional label (ie. 1f or 2b) was parsed above from
1831-
// ParseIntelExpression() then SM.getSym() was set to a pointer to
1832-
// to the MCExpr with the directional local symbol and this is a
1833-
// memory operand not an immediate operand.
1834-
if (SM.getSym())
1835-
return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1836-
Size);
1837-
1838-
const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1839-
return X86Operand::CreateImm(ImmExpr, Start, End);
1840-
}
1841-
1842-
// Only positive immediates are valid.
1843-
if (Imm < 0)
1844-
return ErrorOperand(Start, "expected a positive immediate displacement "
1845-
"before bracketed expr.");
18461760

1847-
// Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1848-
return ParseIntelMemOperand(Imm, Start, Size);
1849-
}
1761+
Start = Tok.getLoc();
18501762

18511763
// rounding mode token
18521764
if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
@@ -1855,7 +1767,8 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
18551767

18561768
// Register.
18571769
unsigned RegNo = 0;
1858-
if (!ParseRegister(RegNo, Start, End)) {
1770+
if (getLexer().is(AsmToken::Identifier) &&
1771+
!ParseRegister(RegNo, Start, End)) {
18591772
// If this is a segment register followed by a ':', then this is the start
18601773
// of a segment override, otherwise this is a normal register reference.
18611774
// In case it is a normal register and there is ptr in the operand this
@@ -1867,12 +1780,63 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
18671780
}
18681781
return X86Operand::CreateReg(RegNo, Start, End);
18691782
}
1870-
18711783
return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
18721784
}
18731785

1874-
// Memory operand.
1875-
return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1786+
// Immediates and Memory
1787+
1788+
// Parse [ BaseReg + Scale*IndexReg + Disp ].
1789+
if (getLexer().is(AsmToken::LBrac))
1790+
return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, false,
1791+
Size);
1792+
1793+
AsmToken StartTok = Tok;
1794+
IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1795+
/*AddImmPrefix=*/false);
1796+
if (ParseIntelExpression(SM, End))
1797+
return nullptr;
1798+
1799+
bool isSymbol = SM.getSym() && SM.getSym()->getKind() != MCExpr::Constant;
1800+
int64_t Imm = SM.getImm();
1801+
if (SM.getSym() && SM.getSym()->getKind() == MCExpr::Constant)
1802+
SM.getSym()->evaluateAsAbsolute(Imm);
1803+
1804+
if (StartTok.isNot(AsmToken::Identifier) &&
1805+
StartTok.isNot(AsmToken::String) && isParsingInlineAsm()) {
1806+
unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1807+
if (StartTok.getString().size() == Len)
1808+
// Just add a prefix if this wasn't a complex immediate expression.
1809+
InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
1810+
else
1811+
// Otherwise, rewrite the complex expression as a single immediate.
1812+
InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
1813+
}
1814+
1815+
if (getLexer().isNot(AsmToken::LBrac)) {
1816+
// If a directional label (ie. 1f or 2b) was parsed above from
1817+
// ParseIntelExpression() then SM.getSym() was set to a pointer to
1818+
// to the MCExpr with the directional local symbol and this is a
1819+
// memory operand not an immediate operand.
1820+
if (isSymbol) {
1821+
if (isParsingInlineAsm())
1822+
return CreateMemForInlineAsm(/*SegReg=*/0, SM.getSym(), /*BaseReg=*/0,
1823+
/*IndexReg=*/0,
1824+
/*Scale=*/1, Start, End, Size,
1825+
SM.getSymName(), SM.getIdentifierInfo());
1826+
return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1827+
Size);
1828+
}
1829+
1830+
const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1831+
return X86Operand::CreateImm(ImmExpr, Start, End);
1832+
}
1833+
1834+
// Only positive immediates are valid.
1835+
if (Imm < 0)
1836+
return ErrorOperand(Start, "expected a positive immediate displacement "
1837+
"before bracketed expr.");
1838+
1839+
return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, isSymbol, Size);
18761840
}
18771841

18781842
std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
@@ -1916,7 +1880,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
19161880
SMLoc Start = Parser.getTok().getLoc(), End;
19171881
if (getSTI().getFeatureBits()[X86::FeatureAVX512])
19181882
return ParseRoundingModeOp(Start, End);
1919-
return ErrorOperand(Start, "unknown token in expression");
1883+
return ErrorOperand(Start, "Unexpected '{' in expression");
19201884
}
19211885
}
19221886
}

‎llvm/test/MC/X86/intel-syntax-encoding.s

+5
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,8 @@ LBB0_3:
7676
// CHECK: encoding: [0xca,0x08,0x00]
7777
retf 8
7878

79+
.set FOO, 2
80+
cmp eax, FOO
81+
// CHECK: encoding: [0x83,0xf8,0x02]
82+
cmp eax, FOO[eax]
83+
// CHECK: encoding: [0x67,0x3b,0x40,0x02]

‎llvm/test/MC/X86/intel-syntax-error.s

+13
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,16 @@ _test2:
1111
.att_syntax noprefix
1212
// CHECK: error: '.att_syntax noprefix' is not supported: registers must have a '%' prefix in .att_syntax
1313
movl $257, -4(esp)
14+
15+
16+
.intel_syntax noprefix
17+
18+
.global arr
19+
.global i
20+
.set FOO, 2
21+
//CHECK-STDERR: error: cannot use base register with variable reference
22+
mov eax, DWORD PTR arr[ebp + 1 + (2 * 5) - 3 + 1<<1]
23+
//CHECK-STDERR: error: cannot use index register with variable reference
24+
mov eax, DWORD PTR arr[esi*4]
25+
//CHECK-STDERR: error: cannot use more than one symbol in memory operand
26+
mov eax, DWORD PTR arr[i]

0 commit comments

Comments
 (0)
Please sign in to comment.