Index: include/llvm/MC/MCParser/MCTargetAsmParser.h =================================================================== --- include/llvm/MC/MCParser/MCTargetAsmParser.h +++ include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -62,30 +62,16 @@ int64_t Imm; StringRef BaseReg; StringRef IndexReg; + StringRef OffsetName; unsigned Scale; - IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0), - BaseReg(StringRef()), IndexReg(StringRef()), - Scale(1) {} - // Compund immediate expression - IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) { - Imm = imm; - } - // [Reg + ImmediateExpression] - // We don't bother to emit an immediate expression evaluated to zero - IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0, - bool needBracs = true) : - IntelExpr(imm, needBracs) { - IndexReg = reg; - if (scale) - Scale = scale; - } - // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression] - IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0, - int64_t imm = 0, bool needBracs = true) : - IntelExpr(indexReg, imm, scale, needBracs) { - BaseReg = baseReg; - } + IntelExpr() : NeedBracs(false), Imm(0), BaseReg(StringRef()), + IndexReg(StringRef()), OffsetName(StringRef()), Scale(1) {} + // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression] + IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale, + StringRef offsetName, int64_t imm, bool needBracs) : + NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg), + OffsetName(offsetName), Scale(scale) {} bool hasBaseReg() const { return BaseReg.size(); } @@ -95,6 +81,14 @@ bool hasRegs() const { return hasBaseReg() || hasIndexReg(); } + bool hasOffset() const { + return OffsetName.size(); + } + // Normally we won't emit immediates unconditionally, + // unless we've got no other components + bool emitImm() const { + return !(hasRegs() || hasOffset()); + } bool isValid() const { return (Scale == 1) || (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8)); Index: lib/MC/MCParser/AsmParser.cpp =================================================================== --- lib/MC/MCParser/AsmParser.cpp +++ lib/MC/MCParser/AsmParser.cpp @@ -5640,9 +5640,12 @@ OS << (AR.IntelExp.hasBaseReg() ? " + " : "") << AR.IntelExp.IndexReg; if (AR.IntelExp.Scale > 1) - OS << " * $$" << AR.IntelExp.Scale; - if (AR.IntelExp.Imm || !AR.IntelExp.hasRegs()) - OS << (AR.IntelExp.hasRegs() ? " + $$" : "$$") << AR.IntelExp.Imm; + OS << " * $$" << AR.IntelExp.Scale; + if (AR.IntelExp.hasOffset()) + OS << (AR.IntelExp.hasRegs() ? " + offset " : "offset ") + << AR.IntelExp.OffsetName; + if (AR.IntelExp.Imm || AR.IntelExp.emitImm()) + OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm; if (AR.IntelExp.NeedBracs) OS << "]"; break; Index: lib/Target/X86/AsmParser/X86AsmParser.cpp =================================================================== --- lib/Target/X86/AsmParser/X86AsmParser.cpp +++ lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -118,7 +118,6 @@ IOK_LENGTH, IOK_SIZE, IOK_TYPE, - IOK_OFFSET }; class InfixCalculator { @@ -310,6 +309,7 @@ IES_RSHIFT, IES_PLUS, IES_MINUS, + IES_OFFSET, IES_NOT, IES_MULTIPLY, IES_DIVIDE, @@ -334,16 +334,28 @@ InlineAsmIdentifierInfo Info; short BracCount; bool MemExpr; + bool OffsetOperator; + + bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { + if (Sym) { + ErrMsg = "cannot use more than one symbol in memory operand"; + return true; + } + Sym = Val; + SymName = ID; + return false; + } public: IntelExprStateMachine() : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), Scale(1), Imm(0), Sym(nullptr), BracCount(0), - MemExpr(false) {} + MemExpr(false), OffsetOperator(false) {} void addImm(int64_t imm) { Imm += imm; } short getBracCount() { return BracCount; } bool isMemExpr() { return MemExpr; } + bool isOffsetOperator() { return OffsetOperator; } unsigned getBaseReg() { return BaseReg; } unsigned getIndexReg() { return IndexReg; } unsigned getScale() { return Scale; } @@ -440,6 +452,7 @@ case IES_INTEGER: case IES_RPAREN: case IES_REGISTER: + case IES_OFFSET: State = IES_PLUS; IC.pushOperator(IC_PLUS); if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { @@ -484,10 +497,12 @@ case IES_INTEGER: case IES_REGISTER: case IES_INIT: + case IES_OFFSET: State = IES_MINUS; // push minus operator if it is not a negate operator if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || - CurrState == IES_INTEGER || CurrState == IES_RBRAC) + CurrState == IES_INTEGER || CurrState == IES_RBRAC || + CurrState == IES_OFFSET) IC.pushOperator(IC_MINUS); else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { // We have negate operator for Scale: it's illegal @@ -540,7 +555,6 @@ } PrevState = CurrState; } - bool onRegister(unsigned Reg, StringRef &ErrMsg) { IntelExprState CurrState = State; switch (State) { @@ -588,7 +602,6 @@ if (auto *CE = dyn_cast(SymRef)) return onInteger(CE->getValue(), ErrMsg); PrevState = State; - bool HasSymbol = Sym != nullptr; switch (State) { default: State = IES_ERROR; @@ -598,18 +611,16 @@ case IES_NOT: case IES_INIT: case IES_LBRAC: + if (setSymRef(SymRef, SymRefName, ErrMsg)) + return true; MemExpr = true; State = IES_INTEGER; - Sym = SymRef; - SymName = SymRefName; IC.pushOperand(IC_IMM); if (ParsingInlineAsm) Info = IDInfo; break; } - if (HasSymbol) - ErrMsg = "cannot use more than one symbol in memory operand"; - return HasSymbol; + return false; } bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { IntelExprState CurrState = State; @@ -783,6 +794,26 @@ break; } } + bool onOffset(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { + PrevState = State; + switch (State) { + default: + ErrMsg = "unexpected offset operator expression"; + return true; + case IES_PLUS: + case IES_INIT: + case IES_LBRAC: + if (setSymRef(Val, ID, ErrMsg)) + return true; + OffsetOperator = true; + State = IES_OFFSET; + // As we cannot yet resolve the actual value (offset), we retain + // the requested semantics by pushing a '0' to the operands stack + IC.pushOperand(IC_IMM); + break; + } + return false; + } }; bool Error(SMLoc L, const Twine &Msg, SMRange Range = None, @@ -814,18 +845,20 @@ std::unique_ptr ParseOperand(); std::unique_ptr ParseATTOperand(); std::unique_ptr ParseIntelOperand(); - std::unique_ptr ParseIntelOffsetOfOperator(); + bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, SMLoc &End); bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); unsigned IdentifyIntelInlineAsmOperator(StringRef Name); unsigned ParseIntelInlineAsmOperator(unsigned OpKind); std::unique_ptr ParseRoundingModeOp(SMLoc Start, SMLoc End); - bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM); + bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM, + bool &ParseError, SMLoc &End); void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start, SMLoc End); bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info, - bool IsUnevaluatedOperand, SMLoc &End); + bool IsUnevaluatedOperand, SMLoc &End, + bool IsParsingOffsetOperator = false); std::unique_ptr ParseMemOperand(unsigned SegReg, SMLoc StartLoc); @@ -1311,26 +1344,39 @@ // Some binary bitwise operators have a named synonymous // Query a candidate string for being such a named operator // and if so - invoke the appropriate handler -bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) { +bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, + IntelExprStateMachine &SM, + bool &ParseError, SMLoc &End) { // A named operator should be either lower or upper case, but not a mix if (Name.compare(Name.lower()) && Name.compare(Name.upper())) return false; - if (Name.equals_lower("not")) + if (Name.equals_lower("not")) { SM.onNot(); - else if (Name.equals_lower("or")) + } else if (Name.equals_lower("or")) { SM.onOr(); - else if (Name.equals_lower("shl")) + } else if (Name.equals_lower("shl")) { SM.onLShift(); - else if (Name.equals_lower("shr")) + } else if (Name.equals_lower("shr")) { SM.onRShift(); - else if (Name.equals_lower("xor")) + } else if (Name.equals_lower("xor")) { SM.onXor(); - else if (Name.equals_lower("and")) + } else if (Name.equals_lower("and")) { SM.onAnd(); - else if (Name.equals_lower("mod")) + } else if (Name.equals_lower("mod")) { SM.onMod(); - else + } else if (Name.equals_lower("offset")) { + const MCExpr *Val = nullptr; + StringRef ID; + if ((ParseError = ParseIntelOffsetOperator(Val, ID, End))) + return true; + StringRef ErrMsg; + if ((ParseError = SM.onOffset(Val, ID, ErrMsg))) + return Error(SMLoc::getFromPointer(Name.data()), ErrMsg); + } else { return false; + } + if (!Name.equals_lower("offset")) + End = consumeToken(); return true; } @@ -1372,8 +1418,12 @@ break; } // Operator synonymous ("not", "or" etc.) - if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM))) + bool ParseError = false; + if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) { + if (ParseError) + return true; break; + } // Symbol reference, when parsing assembly content InlineAsmIdentifierInfo Info; const MCExpr *Val; @@ -1387,9 +1437,6 @@ } // MS InlineAsm operators (TYPE/LENGTH/SIZE) if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { - if (OpKind == IOK_OFFSET) - return Error(IdentLoc, "Dealing OFFSET operator as part of" - "a compound immediate expression is yet to be supported"); if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { if (SM.onInteger(Val, ErrMsg)) return Error(IdentLoc, ErrMsg); @@ -1487,9 +1534,9 @@ SMLoc Loc = Start; unsigned ExprLen = End.getPointer() - Start.getPointer(); // Skip everything before a symbol displacement (if we have one) - if (SM.getSym()) { + if (SM.getSym() && !SM.isOffsetOperator()) { StringRef SymName = SM.getSymName(); - if (unsigned Len = SymName.data() - Start.getPointer()) + if (unsigned Len = SymName.data() - Start.getPointer()) InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len); Loc = SMLoc::getFromPointer(SymName.data() + SymName.size()); ExprLen = End.getPointer() - (SymName.data() + SymName.size()); @@ -1504,21 +1551,27 @@ // Build an Intel Expression rewrite StringRef BaseRegStr; StringRef IndexRegStr; + StringRef OffsetNameStr; if (SM.getBaseReg()) BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg()); if (SM.getIndexReg()) IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg()); + if (SM.isOffsetOperator()) + OffsetNameStr = SM.getSymName(); // Emit it - IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr()); + IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr, + SM.getImm(), SM.isMemExpr()); InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr); } // Inline assembly may use variable names with namespace alias qualifiers. -bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val, - StringRef &Identifier, - InlineAsmIdentifierInfo &Info, - bool IsUnevaluatedOperand, - SMLoc &End) { +bool +X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val, + StringRef &Identifier, + InlineAsmIdentifierInfo &Info, + bool IsUnevaluatedOperand, + SMLoc &End, + bool IsParsingOffsetOperator) { MCAsmParser &Parser = getParser(); assert(isParsingInlineAsm() && "Expected to be parsing inline assembly."); Val = nullptr; @@ -1551,9 +1604,13 @@ SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), Loc, false); assert(InternalName.size() && "We should have an internal name here."); - // Push a rewrite for replacing the identifier name with the internal name. - InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), - InternalName); + // Push a rewrite for replacing the identifier name with the internal name, + // unless we are parsing the operand of an offset operator + if (!IsParsingOffsetOperator) + InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), + InternalName); + else + Identifier = InternalName; } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) return false; // Create the symbol reference. @@ -1633,39 +1690,28 @@ return false; } -/// Parse the 'offset' operator. This operator is used to specify the -/// location rather then the content of a variable. -std::unique_ptr X86AsmParser::ParseIntelOffsetOfOperator() { - MCAsmParser &Parser = getParser(); - const AsmToken &Tok = Parser.getTok(); - SMLoc OffsetOfLoc = Tok.getLoc(); - Parser.Lex(); // Eat offset. - - const MCExpr *Val; +/// Parse the 'offset' operator. +/// This operator is used to specify the location of a given operand +bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, + SMLoc &End) { + // Eat offset, mark start of identifier. + SMLoc Start = Lex().getLoc(); + ID = getTok().getString(); InlineAsmIdentifierInfo Info; - SMLoc Start = Tok.getLoc(), End; - StringRef Identifier = Tok.getString(); - if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, - /*Unevaluated=*/false, End)) - return nullptr; - - void *Decl = nullptr; - // FIXME: MS evaluates "offset " to the underlying integral - if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) - return ErrorOperand(Start, "offset operator cannot yet handle constants"); - else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) - Decl = Info.Var.Decl; - // Don't emit the offset operator. - InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7); - - // The offset operator will have an 'r' constraint, thus we need to create - // register operand to ensure proper matching. Just pick a GPR based on - // the size of a pointer. - bool Parse32 = is32BitMode() || Code16GCC; - unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX); - - return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, - OffsetOfLoc, Identifier, Decl); + if (!isParsingInlineAsm()) { + if (getTok().isNot(AsmToken::Identifier) || + getParser().parsePrimaryExpr(Val, End)) + return Error(Start, "unexpected token!"); + } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) { + return Error(Start, "unable to lookup expression"); + } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) { + return Error(Start, "offset operator cannot yet handle constants"); + } else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var) && + !Info.Var.IsGlobalLV) { + return Error(SMLoc::getFromPointer(ID.data()), "illegal operand for " + "offset operator"); + } + return false; } // Query a candidate string for being an Intel assembly operator @@ -1675,7 +1721,6 @@ .Cases("TYPE","type",IOK_TYPE) .Cases("SIZE","size",IOK_SIZE) .Cases("LENGTH","length",IOK_LENGTH) - .Cases("OFFSET","offset",IOK_OFFSET) .Default(IOK_INVALID); } @@ -1746,13 +1791,6 @@ const AsmToken &Tok = Parser.getTok(); SMLoc Start, End; - // FIXME: Offset operator - // Should be handled as part of immediate expression, as other operators - // Currently, only supported as a stand-alone operand - if (isParsingInlineAsm()) - if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET) - return ParseIntelOffsetOfOperator(); - // Parse optional Size directive. unsigned Size; if (ParseIntelMemoryOperandSize(Size)) Index: test/CodeGen/X86/offset-operator.ll =================================================================== --- test/CodeGen/X86/offset-operator.ll +++ test/CodeGen/X86/offset-operator.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-asm-syntax=intel -relocation-model=static < %s | FileCheck %s + +; Test we are emitting the 'offset' operator upon an immediate reference of a label: +; The emitted 'att-equivalent' of this one is "movl $.L.str, %eax" + +@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 + +define i8* @test_offset_operator() { +; CHECK-LABEL: test_offset_operator: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: mov eax, offset .L.str +; CHECK-NEXT: ret +entry: + ret i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0) +} Index: test/MC/X86/pr32530.s =================================================================== --- test/MC/X86/pr32530.s +++ test/MC/X86/pr32530.s @@ -0,0 +1,13 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s + +.text +// CHECK: movq $msg, %rsi +// CHECK: movq $msg+314159, %rax +// CHECK: movq $msg-89793, msg-6535(%rax,%rbx,2) + mov rsi, offset msg + mov rax, offset msg + 314159 + mov qword ptr [rax + 2*rbx + offset msg - 6535], offset msg - 89793 +.data +msg: + .ascii "Hello, world!\n" +