Index: lib/CodeGen/MIRParser/CMakeLists.txt =================================================================== --- lib/CodeGen/MIRParser/CMakeLists.txt +++ lib/CodeGen/MIRParser/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMMIRParser + MILexer.cpp MIParser.cpp MIRParser.cpp ) Index: lib/CodeGen/MIRParser/MILexer.h =================================================================== --- /dev/null +++ lib/CodeGen/MIRParser/MILexer.h @@ -0,0 +1,76 @@ +//===- MILexer.h - Lexer for machine instructions -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the class that lexes the machine instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H +#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/SourceMgr.h" + +namespace llvm { + +namespace mitok { + +enum Kind { + // Markers + Eof, + Error, + + // Identifier tokens + Identifier +}; + +} // end namespace mitok + +class MILexer { + SourceMgr &SM; + SMDiagnostic &Error; + const char *BufStart; + const char *CurPtr; + const char *BufEnd; + /// The location of the current token. + SMLoc CurLoc; + mitok::Kind CurKind; + StringRef StrVal; + +public: + MILexer(SourceMgr &SM, SMDiagnostic &Error, StringRef Source); + + /// Report an error at the current location with the given message, and return + /// mitok::Error. + mitok::Kind error(const Twine &Msg); + + /// Report an error at the given location with the given message, and return + /// mitok::Error. + mitok::Kind error(SMLoc Loc, const Twine &Msg); + + bool isError() const { return CurKind == mitok::Error; } + + SMLoc getLoc() const { return CurLoc; } + + mitok::Kind getKind() const { return CurKind; } + + StringRef getStringValue() const { return StrVal; } + + void lex() { CurKind = lexToken(); } + +private: + bool isEOF() const; + void skipWhitespace(); + mitok::Kind lexToken(); + mitok::Kind lexIdentifier(); +}; + +} // end namespace llvm + +#endif Index: lib/CodeGen/MIRParser/MILexer.cpp =================================================================== --- /dev/null +++ lib/CodeGen/MIRParser/MILexer.cpp @@ -0,0 +1,68 @@ +//===- MILexer.cpp - Machine instructions lexer implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the lexing of machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "MILexer.h" +#include + +using namespace llvm; + +static bool isIdentifierChar(char C) { + return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.'; +} + +MILexer::MILexer(SourceMgr &SM, SMDiagnostic &Error, StringRef Source) + : SM(SM), Error(Error) { + CurPtr = BufStart = Source.data(); + BufEnd = CurPtr + Source.size(); +} + +mitok::Kind MILexer::error(const Twine &Msg) { return error(getLoc(), Msg); } + +mitok::Kind MILexer::error(SMLoc Loc, const Twine &Msg) { + // TODO: Get the proper location in the MIR file, not just a location inside + // the string. + const char *Ptr = Loc.getPointer(); + assert(Ptr >= BufStart && Ptr <= BufEnd); + Error = SMDiagnostic(SM, SMLoc(), SM.getMemoryBuffer(SM.getMainFileID()) + ->getBufferIdentifier(), + 1, Ptr - BufStart, SourceMgr::DK_Error, Msg.str(), + StringRef(BufStart, BufEnd - BufStart), None, None); + return mitok::Error; +} + +bool MILexer::isEOF() const { return CurPtr >= BufEnd; } + +void MILexer::skipWhitespace() { + while (!isEOF() && isspace(*CurPtr)) + ++CurPtr; +} + +mitok::Kind MILexer::lexIdentifier() { + auto Start = CurPtr - 1; + while (!isEOF() && isIdentifierChar(*CurPtr)) + ++CurPtr; + StrVal = StringRef(Start, CurPtr - Start); + return mitok::Identifier; +} + +mitok::Kind MILexer::lexToken() { + skipWhitespace(); + CurLoc = SMLoc::getFromPointer(CurPtr); + if (isEOF()) + return mitok::Eof; + + char CurChar = *CurPtr++; + if (isalpha(CurChar) || CurChar == '_') + return lexIdentifier(); + return error(Twine("unexpected character '") + Twine(CurChar) + "'"); +} Index: lib/CodeGen/MIRParser/MIParser.cpp =================================================================== --- lib/CodeGen/MIRParser/MIParser.cpp +++ lib/CodeGen/MIRParser/MIParser.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "MIParser.h" +#include "MILexer.h" #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -26,10 +27,8 @@ namespace { class MIParser { - SourceMgr &SM; MachineFunction &MF; - SMDiagnostic &Error; - StringRef Source; + MILexer Lex; /// Maps from instruction names to op codes. StringMap Names2InstrOpCodes; @@ -50,31 +49,27 @@ /// Try to convert an instruction name to an opcode. Return true if the /// instruction name is invalid. bool parseInstrName(StringRef InstrName, unsigned &OpCode); + + bool parseInstruction(unsigned &OpCode); }; } // end anonymous namespace MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, StringRef Source) - : SM(SM), MF(MF), Error(Error), Source(Source) {} + : MF(MF), Lex(SM, Error, Source) {} bool MIParser::error(const Twine &Msg) { - // TODO: Get the proper location in the MIR file, not just a location inside - // the string. - Error = - SMDiagnostic(SM, SMLoc(), SM.getMemoryBuffer(SM.getMainFileID()) - ->getBufferIdentifier(), - 1, 0, SourceMgr::DK_Error, Msg.str(), Source, None, None); + Lex.error(Msg); return true; } MachineInstr *MIParser::parse() { - StringRef InstrName = Source; + Lex.lex(); + unsigned OpCode; - if (parseInstrName(InstrName, OpCode)) { - error(Twine("unknown machine instruction name '") + InstrName + "'"); + if (Lex.isError() || parseInstruction(OpCode)) return nullptr; - } // TODO: Parse the rest of instruction - machine operands, etc. const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); @@ -82,6 +77,15 @@ return MI; } +bool MIParser::parseInstruction(unsigned &OpCode) { + if (Lex.getKind() != mitok::Identifier) + return error("expected a machine instruction"); + StringRef InstrName = Lex.getStringValue(); + if (parseInstrName(InstrName, OpCode)) + return error(Twine("unknown machine instruction name '") + InstrName + "'"); + return false; +} + void MIParser::initNames2InstrOpCodes() { if (!Names2InstrOpCodes.empty()) return; Index: test/CodeGen/MIR/X86/machine-instructions.mir =================================================================== --- test/CodeGen/MIR/X86/machine-instructions.mir +++ test/CodeGen/MIR/X86/machine-instructions.mir @@ -20,5 +20,5 @@ # CHECK: - imul32rri8 # CHECK-NEXT: - retq - imul32rri8 - - retq + - ' retq ' ... Index: test/CodeGen/MIR/X86/missing-instruction.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/X86/missing-instruction.mir @@ -0,0 +1,18 @@ +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s + +--- | + + define void @foo() { + entry: + ret void + } + +... +--- +name: foo +body: + - name: entry + instructions: + # CHECK: 1:1: expected a machine instruction + - '' +... Index: test/CodeGen/MIR/X86/unrecognized-character.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/X86/unrecognized-character.mir @@ -0,0 +1,18 @@ +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s + +--- | + + define void @foo() { + entry: + ret void + } + +... +--- +name: foo +body: + - name: entry + instructions: + # CHECK: 1:1: unexpected character '`' + - '` retq' +...