Index: lib/CodeGen/MIRParser/MILexer.h =================================================================== --- lib/CodeGen/MIRParser/MILexer.h +++ lib/CodeGen/MIRParser/MILexer.h @@ -29,8 +29,13 @@ Eof, Error, + // Tokens with no info. + comma, + equal, + // Identifier tokens - Identifier + Identifier, + NamedRegister }; private: @@ -44,6 +49,8 @@ bool isError() const { return Kind == Error; } + bool isRegister() const { return Kind == NamedRegister; } + bool is(TokenKind K) const { return Kind == K; } bool isNot(TokenKind K) const { return Kind != K; } Index: lib/CodeGen/MIRParser/MILexer.cpp =================================================================== --- lib/CodeGen/MIRParser/MILexer.cpp +++ lib/CodeGen/MIRParser/MILexer.cpp @@ -68,6 +68,33 @@ return C; } +static Cursor lexPercent(Cursor C, MIToken &Token) { + auto Range = C; + C.advance(); // Skip '%' + while (isIdentifierChar(C.peek())) + C.advance(); + Token = MIToken(MIToken::NamedRegister, Range.upto(C)); + return C; +} + +static MIToken::TokenKind symbolToken(char C) { + switch (C) { + case ',': + return MIToken::comma; + case '=': + return MIToken::equal; + default: + return MIToken::Error; + } +} + +static Cursor lexSymbol(Cursor C, MIToken::TokenKind Kind, MIToken &Token) { + auto Range = C; + C.advance(); + Token = MIToken(Kind, Range.upto(C)); + return C; +} + StringRef llvm::lexMIToken( StringRef Source, MIToken &Token, std::function ErrorCallback) { @@ -80,6 +107,11 @@ auto Char = C.peek(); if (isalpha(Char) || Char == '_') return lexIdentifier(C, Token).remaining(); + if (Char == '%') + return lexPercent(C, Token).remaining(); + MIToken::TokenKind Kind = symbolToken(Char); + if (Kind != MIToken::Error) + return lexSymbol(C, Kind, Token).remaining(); Token = MIToken(MIToken::Error, C.remaining()); ErrorCallback(C.location(), Twine("unexpected character '") + Twine(Char) + "'"); Index: lib/CodeGen/MIRParser/MIParser.cpp =================================================================== --- lib/CodeGen/MIRParser/MIParser.cpp +++ lib/CodeGen/MIRParser/MIParser.cpp @@ -34,6 +34,8 @@ MIToken Token; /// Maps from instruction names to op codes. StringMap Names2InstrOpCodes; + /// Maps from register names to registers. + StringMap Names2Regs; public: MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, @@ -53,6 +55,10 @@ MachineInstr *parse(); + bool parseRegister(unsigned &Reg); + bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false); + bool parseMachineOperand(MachineOperand &Dest); + private: void initNames2InstrOpCodes(); @@ -61,6 +67,12 @@ bool parseInstrName(StringRef InstrName, unsigned &OpCode); bool parseInstruction(unsigned &OpCode); + + void initNames2Regs(); + + /// Try to convert a register name to a register number. Return true if the + /// register name is invalid. + bool getRegisterByName(StringRef RegName, unsigned &Reg); }; } // end anonymous namespace @@ -92,13 +104,60 @@ MachineInstr *MIParser::parse() { lex(); + // Parse any register operands before '=' + // TODO: Allow parsing of multiple operands before '=' + MachineOperand MO = MachineOperand::CreateImm(0); + SmallVector Operands; + if (Token.isRegister()) { + if (parseRegisterOperand(MO, /*IsDef=*/true)) + return nullptr; + Operands.push_back(MO); + if (Token.isNot(MIToken::equal)) { + error("expected '='"); + return nullptr; + } + lex(); + } + unsigned OpCode; if (Token.isError() || parseInstruction(OpCode)) return nullptr; - // TODO: Parse the rest of instruction - machine operands, etc. + // TODO: Parse the instruction flags and memory operands. + + // Parse the remaining machine operands. + while (Token.isNot(MIToken::Eof)) { + if (parseMachineOperand(MO)) + return nullptr; + Operands.push_back(MO); + if (Token.is(MIToken::Eof)) + break; + if (Token.isNot(MIToken::comma)) { + error("expected ',' before the next machine operand"); + return nullptr; + } + lex(); + } + const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); - auto *MI = MF.CreateMachineInstr(MCID, DebugLoc()); + + // Verify machine operands. + if (!MCID.isVariadic()) { + for (size_t I = 0, E = Operands.size(); I < E; ++I) { + if (I < MCID.getNumOperands()) + continue; + // Mark this register as implicit to prevent an assertion when it's added + // to an instruction. This is a temporary workaround until the implicit + // register flag can be parsed. + Operands[I].setImplicit(); + } + } + + // TODO: Determine the implicit behaviour when implicit register flags are + // parsed. + auto *MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true); + for (const auto &Operand : Operands) + MI->addOperand(MF, Operand); return MI; } @@ -108,6 +167,46 @@ StringRef InstrName = Token.stringValue(); if (parseInstrName(InstrName, OpCode)) return error(Twine("unknown machine instruction name '") + InstrName + "'"); + lex(); + return false; +} + +bool MIParser::parseRegister(unsigned &Reg) { + switch (Token.kind()) { + case MIToken::NamedRegister: { + StringRef Name = Token.stringValue().drop_front(1); // Drop the '%' + if (getRegisterByName(Name, Reg)) + return error(Twine("unknown register name '") + Name + "'"); + break; + } + // TODO: Parse other register kinds. + default: + llvm_unreachable("The current token should be a register"); + } + return false; +} + +bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) { + unsigned Reg; + // TODO: Parse register flags. + if (parseRegister(Reg)) + return true; + lex(); + // TODO: Parse subregister. + Dest = MachineOperand::CreateReg(Reg, IsDef); + return false; +} + +bool MIParser::parseMachineOperand(MachineOperand &Dest) { + switch (Token.kind()) { + case MIToken::NamedRegister: + return parseRegisterOperand(Dest); + case MIToken::Error: + return true; + default: + // TODO: parse the other machine operands. + return error("expected a machine operand"); + } return false; } @@ -129,6 +228,29 @@ return false; } +void MIParser::initNames2Regs() { + if (!Names2Regs.empty()) + return; + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) { + bool WasInserted = + Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)) + .second; + (void)WasInserted; + assert(WasInserted && "Expected registers to be unique case-insensitively"); + } +} + +bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) { + initNames2Regs(); + auto RegInfo = Names2Regs.find(RegName); + if (RegInfo == Names2Regs.end()) + return true; + Reg = RegInfo->getValue(); + return false; +} + MachineInstr *llvm::parseMachineInstr(SourceMgr &SM, MachineFunction &MF, StringRef Src, SMDiagnostic &Error) { return MIParser(SM, MF, Error, Src).parse(); Index: lib/CodeGen/MIRPrinter.cpp =================================================================== --- lib/CodeGen/MIRPrinter.cpp +++ lib/CodeGen/MIRPrinter.cpp @@ -50,6 +50,7 @@ MIPrinter(raw_ostream &OS) : OS(OS) {} void print(const MachineInstr &MI); + void print(const MachineOperand &Op, const TargetRegisterInfo *TRI); }; } // end anonymous namespace @@ -110,11 +111,58 @@ void MIPrinter::print(const MachineInstr &MI) { const auto &SubTarget = MI.getParent()->getParent()->getSubtarget(); + const auto *TRI = SubTarget.getRegisterInfo(); + assert(TRI && "Expected target register info"); const auto *TII = SubTarget.getInstrInfo(); assert(TII && "Expected target instruction info"); + unsigned I = 0, E = MI.getNumOperands(); + for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() && + !MI.getOperand(I).isImplicit(); + ++I) { + if (I) + OS << ", "; + print(MI.getOperand(I), TRI); + } + + if (I) + OS << " = "; OS << TII->getName(MI.getOpcode()); - // TODO: Print the instruction flags, machine operands, machine mem operands. + // TODO: Print the instruction flags, machine mem operands. + if (I < E) + OS << ' '; + + bool NeedComma = false; + for (; I < E; ++I) { + if (NeedComma) + OS << ", "; + print(MI.getOperand(I), TRI); + NeedComma = true; + } +} + +static void printReg(unsigned Reg, raw_ostream &OS, + const TargetRegisterInfo *TRI) { + // TODO: Print Stack Slots. + // TODO: Print no register. + // TODO: Print virtual registers. + if (Reg < TRI->getNumRegs()) + OS << '%' << StringRef(TRI->getName(Reg)).lower(); + else + llvm_unreachable("Can't print this kind of register yet"); +} + +void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) { + switch (Op.getType()) { + case MachineOperand::MO_Register: + // TODO: Print register flags. + printReg(Op.getReg(), OS, TRI); + // TODO: Print sub register. + break; + default: + // TODO: Print the other machine operands. + llvm_unreachable("Can't print this machine operand at the moment"); + } } void llvm::printMIR(raw_ostream &OS, const Module &M) { Index: test/CodeGen/MIR/X86/expected-machine-operand.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/X86/expected-machine-operand.mir @@ -0,0 +1,20 @@ +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s + +--- | + + define i32 @foo() { + entry: + ret i32 0 + } + +... +--- +name: foo +body: + - name: entry + instructions: + # CHECK: 1:16: expected a machine operand + - '%eax = XOR32rr =' + - 'RETQ %eax' +... + Index: test/CodeGen/MIR/X86/missing-comma.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/X86/missing-comma.mir @@ -0,0 +1,20 @@ +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s + +--- | + + define i32 @foo() { + entry: + ret i32 0 + } + +... +--- +name: foo +body: + - name: entry + instructions: + # CHECK: 1:21: expected ',' before the next machine operand + - '%eax = XOR32rr %eax %eflags' + - 'RETQ %eax' +... + Index: test/CodeGen/MIR/X86/named-registers.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/X86/named-registers.mir @@ -0,0 +1,22 @@ +# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s +# This test ensures that the MIR parser parses X86 registers correctly. + +--- | + + define i32 @foo() { + entry: + ret i32 0 + } + +... +--- +# CHECK: name: foo +name: foo +body: + - name: entry + instructions: + # CHECK: - '%eax = MOV32r0 + # CHECK-NEXT: - 'RETQ %eax + - '%eax = MOV32r0' + - 'RETQ %eax' +... Index: test/CodeGen/MIR/X86/unknown-register.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/X86/unknown-register.mir @@ -0,0 +1,21 @@ +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that an error is reported when an unknown register is +# encountered. + +--- | + + define i32 @foo() { + entry: + ret i32 0 + } + +... +--- +name: foo +body: + - name: entry + instructions: + # CHECK: 1:1: unknown register name 'xax' + - '%xax = MOV32r0' + - 'RETQ %xax' +...