Index: include/llvm/MC/MCParser/MCAsmParser.h =================================================================== --- include/llvm/MC/MCParser/MCAsmParser.h +++ include/llvm/MC/MCParser/MCAsmParser.h @@ -38,11 +38,14 @@ public: void *OpDecl; bool IsVarDecl; + // Answer whether the identifier in question can be seen as a global lvalue + bool IsGlobalLV; unsigned Length, Size, Type; void clear() { OpDecl = nullptr; IsVarDecl = false; + IsGlobalLV = false; Length = 1; Size = 0; Type = 0; Index: include/llvm/MC/MCParser/MCTargetAsmParser.h =================================================================== --- include/llvm/MC/MCParser/MCTargetAsmParser.h +++ include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -42,6 +42,9 @@ AOK_Output, // Rewrite in terms of $N. AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). AOK_Label, // Rewrite local labels. + AOK_IntelExpr, // Rewrite a compound expression to a canonical form + // which is an Intel expression, roughly as: + // [ Base + Index * Scale + ImmDisp ] AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t"). AOK_Skip // Skip emission (e.g., offset/type operators). }; @@ -58,22 +61,39 @@ 3, // AOK_Output 5, // AOK_SizeDirective 1, // AOK_Label + 1, // AOK_IntelExpr 5, // AOK_EndOfStatement 2 // AOK_Skip }; +struct IntelExprRewrite { + StringRef Base; + StringRef Index; + unsigned Scale; + unsigned ImmDisp; + + IntelExprRewrite() : Scale(1), ImmDisp(0) {} + IntelExprRewrite(StringRef base, StringRef index, unsigned scale, + unsigned immDisp) : Base(base), Index(index), Scale(scale), + ImmDisp(immDisp) {}; +}; + struct AsmRewrite { AsmRewriteKind Kind; SMLoc Loc; unsigned Len; unsigned Val; StringRef Label; + IntelExprRewrite IntelExpr; public: AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, unsigned val = 0) : Kind(kind), Loc(loc), Len(len), Val(val) {} AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) : Kind(kind), Loc(loc), Len(len), Val(0), Label(label) {} + // IntelExpr + AsmRewrite(SMLoc loc, unsigned len, IntelExprRewrite intelExpr) + : Kind(AOK_IntelExpr), Loc(loc), Len(len), IntelExpr(intelExpr) {} }; struct ParseInstructionInfo { Index: lib/MC/MCParser/AsmParser.cpp =================================================================== --- lib/MC/MCParser/AsmParser.cpp +++ lib/MC/MCParser/AsmParser.cpp @@ -5474,6 +5474,21 @@ case AOK_Label: OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label; break; + case AOK_IntelExpr: + OS << "["; + if (!AR.IntelExpr.Base.empty()) + OS << AR.IntelExpr.Base; + if (!AR.IntelExpr.Index.empty()) { + if (!AR.IntelExpr.Base.empty()) + OS << " + "; + OS << AR.IntelExpr.Index; + if (AR.IntelExpr.Scale > 1) + OS << " * $$" << AR.IntelExpr.Scale; + } + if (AR.IntelExpr.ImmDisp) + OS << " + $$" << AR.IntelExpr.ImmDisp; + OS << "]"; + break; case AOK_Input: OS << '$' << InputIdx++; break; Index: lib/Target/X86/AsmParser/X86AsmParser.cpp =================================================================== --- lib/Target/X86/AsmParser/X86AsmParser.cpp +++ lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -11,6 +11,7 @@ #include "X86AsmInstrumentation.h" #include "X86AsmParserCommon.h" #include "X86Operand.h" +#include "InstPrinter/X86IntelInstPrinter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -757,7 +758,8 @@ bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); std::unique_ptr ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp, - bool isSymbol, unsigned Size); + unsigned Size, + IntelExprStateMachine *SymbolSM = nullptr); bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info, bool IsUnevaluatedOperand, SMLoc &End); @@ -1248,75 +1250,83 @@ if (IsSymRef && !Size && Info.Type) FrontendSize = Info.Type * 8; // Size is in terms of bits in this context. - // When parsing inline assembly we set the base register to a non-zero value - // if we don't know the actual value at this time. This is necessary to + // It is widely common for a MS-style memory reference to use global + // variable and one or two registers. In which case - we can't address the + // variable via RIP/EIP. + // Otherwise, we set the base register to a non-zero value, + // if we don't know the actual value at this time. This is necessary to // get the matching correct in some cases. - BaseReg = BaseReg ? BaseReg : 1; - return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, - IndexReg, Scale, Start, End, Size, Identifier, - Info.OpDecl, FrontendSize); + if ((BaseReg || IndexReg) && Info.IsGlobalLV) { + return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End); + } else { + BaseReg = BaseReg ? BaseReg : 1; + return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, + IndexReg, Scale, Start, End, Size, Identifier, + Info.OpDecl, FrontendSize); + } } -static void +static bool RewriteIntelBracExpression(SmallVectorImpl &AsmRewrites, - StringRef SymName, int64_t ImmDisp, - int64_t FinalImmDisp, SMLoc &BracLoc, - SMLoc &StartInBrac, SMLoc &End) { - // Remove the '[' and ']' from the IR string. - AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1); - AsmRewrites.emplace_back(AOK_Skip, End, 1); - - // If ImmDisp is non-zero, then we parsed a displacement before the - // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) - // If ImmDisp doesn't match the displacement computed by the state machine - // then we have an additional displacement in the bracketed expression. - if (ImmDisp != FinalImmDisp) { - if (ImmDisp) { - // We have an immediate displacement before the bracketed expression. - // Adjust this to match the final immediate displacement. - bool Found = false; - for (AsmRewrite &AR : AsmRewrites) { - if (AR.Loc.getPointer() > BracLoc.getPointer()) - continue; - if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) { - assert (!Found && "ImmDisp already rewritten."); + StringRef SymName, int64_t ImmDisp, unsigned BaseReg, + unsigned IndexReg, unsigned Scale, SMLoc Start, + SMLoc &BracLoc, SMLoc &End, bool IsGlobalLV) { + if (BaseReg && IndexReg && !IsGlobalLV) + return true; + + // Remove any associated immediate rewrites entered by the State Machine, + // as we'll emit the accomulated immediate anyway. + bool IsRegExpr = BaseReg || IndexReg; + bool Found = false; + for (AsmRewrite &AR : AsmRewrites) + if (AR.Kind == AOK_Imm || AR.Kind == AOK_ImmPrefix) { + if ((AR.Loc.getPointer() < BracLoc.getPointer()) && + (Start.getPointer() <= AR.Loc.getPointer())) { + assert(!Found && "ImmDisp already rewritten"); + AR.Len = BracLoc.getPointer() - AR.Loc.getPointer(); + Found = true; + // Either skip or modify an AOK_Imm rewrite which is positioned + // before a bracketed expression, depending whether it is a compound + // expression (registers + variable), or a simple one (variable) + if (IsRegExpr || !ImmDisp) + AR.Kind = AOK_Skip; + else if (ImmDisp) { AR.Kind = AOK_Imm; - AR.Len = BracLoc.getPointer() - AR.Loc.getPointer(); - AR.Val = FinalImmDisp; - Found = true; - break; + AR.Val = ImmDisp; } - } - assert (Found && "Unable to rewrite ImmDisp."); - (void)Found; - } else { - // We have a symbolic and an immediate displacement, but no displacement - // before the bracketed expression. Put the immediate displacement - // before the bracketed expression. - AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp); - } - } - // Remove all the ImmPrefix rewrites within the brackets. - // We may have some Imm rewrties as a result of an operator applying, - // remove them as well - for (AsmRewrite &AR : AsmRewrites) { - if (AR.Loc.getPointer() < StartInBrac.getPointer()) - continue; - if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) - AR.Kind = AOK_Delete; + // Delete all AOK_ImmPrefix rewrites from within a bracketed expression + } else if (AR.Loc.getPointer() > BracLoc.getPointer()) + AR.Kind = AOK_Delete; + } + // [var + ImmDisp] + if (!Found && !IsRegExpr && ImmDisp) { + SMLoc Loc = SymName.data() < BracLoc.getPointer() ? + SMLoc::getFromPointer(SymName.data()) : BracLoc; + AsmRewrites.emplace_back(AOK_Imm, Loc, 0, ImmDisp); } + const char *SymLocPtr = SymName.data(); - // Skip everything before the symbol. - if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { - assert(Len > 0 && "Expected a non-negative length."); - AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len); - } - // Skip everything after the symbol. - if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { - SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); - assert(Len > 0 && "Expected a non-negative length."); + int Len = SymLocPtr - BracLoc.getPointer(); + + if (Len > 0) + // Symbol is within the brackets, skip anything between it and the left brac + AsmRewrites.emplace_back(AOK_Skip, BracLoc, Len); + + // Either skip everything after the symbol, + // or rewrite it to an AOK_IntelExpr + Len = End.getPointer() - (SymLocPtr + SymName.size()) + 1; + SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); + if (!IsRegExpr) { AsmRewrites.emplace_back(AOK_Skip, Loc, Len); + } else { + StringRef Base = BaseReg ? + X86IntelInstPrinter::getRegisterName(BaseReg) : ""; + StringRef Index = IndexReg ? + X86IntelInstPrinter::getRegisterName(IndexReg) : ""; + IntelExprRewrite IntelExpr(Base, Index, Scale, ImmDisp); + AsmRewrites.emplace_back(Loc, Len, IntelExpr); } + return false; } // Some binary bitwise operators have a named synonymous @@ -1477,8 +1487,8 @@ std::unique_ptr X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, - int64_t ImmDisp, bool isSymbol, - unsigned Size) { + int64_t ImmDisp, unsigned Size, + IntelExprStateMachine *SymbolSM) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); @@ -1487,29 +1497,40 @@ Parser.Lex(); // Eat '[' SMLoc StartInBrac = Parser.getTok().getLoc(); - // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We + // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We // may have already parsed an immediate displacement before the bracketed // expression. IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); if (ParseIntelExpression(SM, End)) return nullptr; - const MCExpr *Disp = nullptr; - if (const MCExpr *Sym = SM.getSym()) { - // A symbolic displacement. - Disp = Sym; - if (isParsingInlineAsm()) - RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(), - ImmDisp, SM.getImm(), BracLoc, StartInBrac, - End); + bool IsSymbol = SM.getSym() && SM.getSym()->getKind() != MCExpr::Constant; + if (SymbolSM && IsSymbol) { + Error(Start, "cannot use more than one symbol in memory operand"); + return nullptr; } - if (SM.getImm() || !Disp) { - const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext()); + int64_t Imm = 0; + if (SM.getSym() && !IsSymbol) + SM.getSym()->evaluateAsAbsolute(Imm); + Imm += SM.getImm(); + + IntelExprStateMachine *SymSM = IsSymbol ? &SM : SymbolSM; + const MCExpr *Disp = SymSM ? SymSM->getSym() : nullptr; + if (Disp && isParsingInlineAsm()) + if (RewriteIntelBracExpression(*InstInfo->AsmRewrites, SymSM->getSymName(), + Imm, SM.getBaseReg(), SM.getIndexReg(), + SM.getScale(), Start, BracLoc, End, + SymSM->getIdentifierInfo().IsGlobalLV)) + return ErrorOperand(BracLoc, "Can't use a local variable with both " + "base and index registers"); + + if (Imm || !Disp) { + const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext()); if (Disp) - Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext()); + Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext()); else - Disp = Imm; // An immediate displacement only. + Disp = ImmDisp; // An immediate displacement only. } // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC @@ -1528,20 +1549,6 @@ Disp = NewDisp; } - if (isSymbol) { - if (SM.getSym()) { - Error(Start, "cannot use more than one symbol in memory operand"); - return nullptr; - } - if (SM.getBaseReg()) { - Error(Start, "cannot use base register with variable reference"); - return nullptr; - } - if (SM.getIndexReg()) { - Error(Start, "cannot use index register with variable reference"); - return nullptr; - } - } int BaseReg = SM.getBaseReg(); int IndexReg = SM.getIndexReg(); @@ -1563,10 +1570,15 @@ IndexReg, Scale, Start, End, Size); } - InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); + if (SymSM) + return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, + End, Size, SymSM->getSymName(), + SymSM->getIdentifierInfo()); + + InlineAsmIdentifierInfo Info; + Info.clear(); return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, - End, Size, SM.getSymName(), Info, - isParsingInlineAsm()); + End, Size, StringRef(), Info); } // Inline assembly may use variable names with namespace alias qualifiers. @@ -1649,7 +1661,7 @@ } if (getLexer().is(AsmToken::LBrac)) - return ParseIntelBracExpression(SegReg, Start, ImmDisp, false, Size); + return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); const MCExpr *Val; SMLoc End; @@ -1884,8 +1896,7 @@ // Parse [ BaseReg + Scale*IndexReg + Disp ]. if (getLexer().is(AsmToken::LBrac)) - return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, false, - Size); + return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, Size); AsmToken StartTok = Tok; IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, @@ -1929,11 +1940,12 @@ } // Only positive immediates are valid. - if (Imm < 0) + if (Imm < 0 ) return ErrorOperand(Start, "expected a positive immediate displacement " "before bracketed expr."); - return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, isSymbol, Size); + IntelExprStateMachine *SymbolSM = isSymbol ? &SM : nullptr; + return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, Size, SymbolSM); } std::unique_ptr X86AsmParser::ParseATTOperand() { Index: test/MC/X86/intel-syntax-error.s =================================================================== --- test/MC/X86/intel-syntax-error.s +++ test/MC/X86/intel-syntax-error.s @@ -18,10 +18,6 @@ .global arr .global i .set FOO, 2 -//CHECK: error: cannot use base register with variable reference -mov eax, DWORD PTR arr[ebp + 1 + (2 * 5) - 3 + 1<<1] -//CHECK: error: cannot use index register with variable reference -mov eax, DWORD PTR arr[esi*4] //CHECK: error: cannot use more than one symbol in memory operand mov eax, DWORD PTR arr[i] //CHECK: error: rip can only be used as a base register