Index: lib/CodeGen/MIRParser/MILexer.h =================================================================== --- lib/CodeGen/MIRParser/MILexer.h +++ lib/CodeGen/MIRParser/MILexer.h @@ -26,8 +26,13 @@ Eof, Error, + // Tokens with no info. + comma, + equal, + // Identifier tokens - Identifier + Identifier, + NamedRegister }; } // end namespace mitok @@ -58,6 +63,8 @@ SMLoc getLoc() const { return CurLoc; } + bool isRegister() const { return CurKind == mitok::NamedRegister; } + mitok::Kind getKind() const { return CurKind; } StringRef getStringValue() const { return StrVal; } @@ -69,6 +76,7 @@ void skipWhitespace(); mitok::Kind lexToken(); mitok::Kind lexIdentifier(); + mitok::Kind lexPercent(); }; } // end namespace llvm Index: lib/CodeGen/MIRParser/MILexer.cpp =================================================================== --- lib/CodeGen/MIRParser/MILexer.cpp +++ lib/CodeGen/MIRParser/MILexer.cpp @@ -55,6 +55,16 @@ return mitok::Identifier; } +mitok::Kind MILexer::lexPercent() { + auto Start = CurPtr; + while (!isEOF() && isIdentifierChar(*CurPtr)) + ++CurPtr; + StrVal = StringRef(Start, CurPtr - Start); + if (StrVal.empty()) + return error(Twine("expected a name after '%'")); + return mitok::NamedRegister; +} + mitok::Kind MILexer::lexToken() { skipWhitespace(); CurLoc = SMLoc::getFromPointer(CurPtr); @@ -62,7 +72,16 @@ return mitok::Eof; char CurChar = *CurPtr++; - if (isalpha(CurChar) || CurChar == '_') - return lexIdentifier(); - return error(Twine("unexpected character '") + Twine(CurChar) + "'"); + switch (CurChar) { + case ',': + return mitok::comma; + case '=': + return mitok::equal; + case '%': + return lexPercent(); + default: + if (isalpha(CurChar) || CurChar == '_') + return lexIdentifier(); + return error(Twine("unexpected character '") + Twine(CurChar) + "'"); + } } Index: lib/CodeGen/MIRParser/MIParser.cpp =================================================================== --- lib/CodeGen/MIRParser/MIParser.cpp +++ lib/CodeGen/MIRParser/MIParser.cpp @@ -31,6 +31,8 @@ MILexer Lex; /// Maps from instruction names to op codes. StringMap Names2InstrOpCodes; + /// Maps from register names to registers. + StringMap Names2Regs; public: MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, @@ -43,6 +45,12 @@ MachineInstr *parse(); + bool parseRegister(unsigned &Reg); + + bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false); + + bool parseMachineOperand(MachineOperand &Dest); + private: void initNames2InstrOpCodes(); @@ -51,6 +59,12 @@ bool parseInstrName(StringRef InstrName, unsigned &OpCode); bool parseInstruction(unsigned &OpCode); + + void initNames2Regs(); + + /// Try to convert a register name to a register number. Return true if the + /// register name is invalid. + bool getRegisterByName(StringRef RegName, unsigned &Reg); }; } // end anonymous namespace @@ -67,13 +81,61 @@ MachineInstr *MIParser::parse() { Lex.lex(); + // Parse any register operands before '=' + // TODO: Allow parsing of multiple operands before '=' + MachineOperand MO = MachineOperand::CreateImm(0); + SmallVector Operands; + if (Lex.isRegister()) { + if (parseRegisterOperand(MO, /*IsDef=*/true)) + return nullptr; + Operands.push_back(MO); + if (Lex.getKind() != mitok::equal) { + error("expected '='"); + return nullptr; + } + Lex.lex(); + } + unsigned OpCode; if (Lex.isError() || parseInstruction(OpCode)) return nullptr; - // TODO: Parse the rest of instruction - machine operands, etc. + // TODO: Parse the instruction flags and memory operands. + + // Parse the remaining machine operands. + while (Lex.getKind() != mitok::Eof) { + if (parseMachineOperand(MO)) + return nullptr; + Operands.push_back(MO); + if (Lex.getKind() == mitok::Eof) + break; + if (Lex.getKind() != mitok::comma) { + error("expected ',' before the next machine operand"); + return nullptr; + } + Lex.lex(); + } + const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); - auto *MI = MF.CreateMachineInstr(MCID, DebugLoc()); + + // Verify machine operands + if (!MCID.isVariadic()) { + for (size_t I = 0, E = Operands.size(); I < E; ++I) { + if (I < MCID.getNumOperands()) + continue; + // Mark this register as implicit to prevent an assertion when it's added + // to an instruction. This is a temporary workaround until the implicit + // register flag can be parsed. + Operands[I].setImplicit(); + } + } + + // TODO: Determine the implicit behaviour when implicit register flags are + // parsed. + auto *MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true); + for (const auto &Operand : Operands) { + MI->addOperand(MF, Operand); + } return MI; } @@ -83,6 +145,45 @@ StringRef InstrName = Lex.getStringValue(); if (parseInstrName(InstrName, OpCode)) return error(Twine("unknown machine instruction name '") + InstrName + "'"); + Lex.lex(); + return false; +} + +bool MIParser::parseRegister(unsigned &Reg) { + switch (Lex.getKind()) { + case mitok::NamedRegister: + if (getRegisterByName(Lex.getStringValue(), Reg)) + return error(Twine("unknown register name '") + Lex.getStringValue() + + "'"); + break; + // TODO: Parse other register kinds. + default: + llvm_unreachable("The current token should be a register"); + } + return false; +} + +bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) { + unsigned Reg; + // TODO: Parse register flags + if (parseRegister(Reg)) + return true; + Lex.lex(); + // TODO: Parse subregister. + Dest = MachineOperand::CreateReg(Reg, IsDef); + return false; +} + +bool MIParser::parseMachineOperand(MachineOperand &Dest) { + switch (Lex.getKind()) { + case mitok::NamedRegister: + return parseRegisterOperand(Dest); + case mitok::Error: + return true; + default: + // TODO: parse the other machine operands. + return error("expected a machine operand"); + } return false; } @@ -105,6 +206,24 @@ return false; } +void MIParser::initNames2Regs() { + if (!Names2Regs.empty()) + return; + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) + Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)); +} + +bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) { + initNames2Regs(); + auto RegInfo = Names2Regs.find(RegName); + if (RegInfo == Names2Regs.end()) + return true; + Reg = RegInfo->getValue(); + return false; +} + MachineInstr *llvm::parseMachineInstr(SourceMgr &SM, MachineFunction &MF, StringRef Src, SMDiagnostic &Error) { return MIParser(SM, MF, Error, Src).parse(); Index: lib/CodeGen/MIRPrinter.cpp =================================================================== --- lib/CodeGen/MIRPrinter.cpp +++ lib/CodeGen/MIRPrinter.cpp @@ -50,6 +50,8 @@ MIPrinter(raw_ostream &OS) : OS(OS) {} void print(const MachineInstr &MI); + + void print(const MachineOperand &Op, const TargetRegisterInfo *TRI); }; } // end anonymous namespace @@ -110,10 +112,57 @@ void MIPrinter::print(const MachineInstr &MI) { const auto &SubTarget = MI.getParent()->getParent()->getSubtarget(); + const auto *TRI = SubTarget.getRegisterInfo(); + assert(TRI && "Expected target register info"); const auto *TII = SubTarget.getInstrInfo(); + unsigned I = 0, E = MI.getNumOperands(); + for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() && + !MI.getOperand(I).isImplicit(); + ++I) { + if (I) + OS << ", "; + print(MI.getOperand(I), TRI); + } + + if (I) + OS << " = "; OS << StringRef(TII->getName(MI.getOpcode())).lower(); - // TODO: Print the instruction flags, machine operands, machine mem operands. + // TODO: Print the instruction flags, machine mem operands. + if (I < E) + OS << ' '; + + bool NeedComma = false; + for (; I < E; ++I) { + if (NeedComma) + OS << ", "; + print(MI.getOperand(I), TRI); + NeedComma = true; + } +} + +static void printReg(unsigned Reg, raw_ostream &OS, + const TargetRegisterInfo *TRI) { + // TODO: Print Stack Slots + // TODO: Print no register + // TODO: Print virtual registers + if (Reg < TRI->getNumRegs()) + OS << '%' << StringRef(TRI->getName(Reg)).lower(); + else + llvm_unreachable("Can't print this kind of register yet"); +} + +void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) { + switch (Op.getType()) { + case MachineOperand::MO_Register: + // TODO: Print register flags. + printReg(Op.getReg(), OS, TRI); + // TODO: Print sub register. + break; + default: + // TODO: Print the other machine operands. + llvm_unreachable("Can't print this machine operand at the moment"); + } } void llvm::printMIR(raw_ostream &OS, const Module &M) { Index: test/CodeGen/MIR/X86/expected-machine-operand.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/X86/expected-machine-operand.mir @@ -0,0 +1,20 @@ +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s + +--- | + + define i32 @foo() { + entry: + ret i32 0 + } + +... +--- +name: foo +body: + - name: entry + instructions: + # CHECK: 1:16: expected a machine operand + - '%eax = xor32rr =' + - 'retq %eax' +... + Index: test/CodeGen/MIR/X86/missing-comma.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/X86/missing-comma.mir @@ -0,0 +1,20 @@ +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s + +--- | + + define i32 @foo() { + entry: + ret i32 0 + } + +... +--- +name: foo +body: + - name: entry + instructions: + # CHECK: 1:21: expected ',' before the next machine operand + - '%eax = xor32rr %eax %eflags' + - 'retq %eax' +... + Index: test/CodeGen/MIR/X86/named-registers.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/X86/named-registers.mir @@ -0,0 +1,22 @@ +# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s +# This test ensures that the MIR parser parses X86 registers coorectly. + +--- | + + define i32 @foo() { + entry: + ret i32 0 + } + +... +--- +# CHECK: name: foo +name: foo +body: + - name: entry + instructions: + # CHECK: - '%eax = mov32r0 + # CHECK-NEXT: - 'retq %eax + - '%eax = mov32r0' + - 'retq %eax' +... Index: test/CodeGen/MIR/X86/unknown-register.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/X86/unknown-register.mir @@ -0,0 +1,21 @@ +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that an error is reported when an unknown register is +# encountered. + +--- | + + define i32 @foo() { + entry: + ret i32 0 + } + +... +--- +name: foo +body: + - name: entry + instructions: + # CHECK: 1:1: unknown register name 'xax' + - '%xax = mov32r0' + - 'retq %xax' +...