Skip to content

Commit 91370c5

Browse files
committedJun 22, 2015
MIR Serialization: Introduce a lexer for machine instructions.
This commit adds a function that tokenizes the string containing the machine instruction. This commit also adds a struct called 'MIToken' which is used to represent the lexer's tokens. Reviewers: Sean Silva Differential Revision: http://reviews.llvm.org/D10521 llvm-svn: 240323
1 parent f228550 commit 91370c5

File tree

7 files changed

+230
-12
lines changed

7 files changed

+230
-12
lines changed
 

‎llvm/lib/CodeGen/MIRParser/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
add_llvm_library(LLVMMIRParser
2+
MILexer.cpp
23
MIParser.cpp
34
MIRParser.cpp
45
)
+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
//===- MILexer.cpp - Machine instructions lexer implementation ----------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file implements the lexing of machine instructions.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "MILexer.h"
15+
#include "llvm/ADT/Twine.h"
16+
#include <cctype>
17+
18+
using namespace llvm;
19+
20+
namespace {
21+
22+
/// This class provides a way to iterate and get characters from the source
23+
/// string.
24+
class Cursor {
25+
const char *Ptr;
26+
const char *End;
27+
28+
public:
29+
explicit Cursor(StringRef Str) {
30+
Ptr = Str.data();
31+
End = Ptr + Str.size();
32+
}
33+
34+
bool isEOF() const { return Ptr == End; }
35+
36+
char peek() const { return isEOF() ? 0 : *Ptr; }
37+
38+
void advance() { ++Ptr; }
39+
40+
StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
41+
42+
StringRef upto(Cursor C) const {
43+
assert(C.Ptr >= Ptr && C.Ptr <= End);
44+
return StringRef(Ptr, C.Ptr - Ptr);
45+
}
46+
47+
StringRef::iterator location() const { return Ptr; }
48+
};
49+
50+
} // end anonymous namespace
51+
52+
/// Skip the leading whitespace characters and return the updated cursor.
53+
static Cursor skipWhitespace(Cursor C) {
54+
while (isspace(C.peek()))
55+
C.advance();
56+
return C;
57+
}
58+
59+
static bool isIdentifierChar(char C) {
60+
return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.';
61+
}
62+
63+
static Cursor lexIdentifier(Cursor C, MIToken &Token) {
64+
auto Range = C;
65+
while (isIdentifierChar(C.peek()))
66+
C.advance();
67+
Token = MIToken(MIToken::Identifier, Range.upto(C));
68+
return C;
69+
}
70+
71+
StringRef llvm::lexMIToken(
72+
StringRef Source, MIToken &Token,
73+
function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
74+
auto C = skipWhitespace(Cursor(Source));
75+
if (C.isEOF()) {
76+
Token = MIToken(MIToken::Eof, C.remaining());
77+
return C.remaining();
78+
}
79+
80+
auto Char = C.peek();
81+
if (isalpha(Char) || Char == '_')
82+
return lexIdentifier(C, Token).remaining();
83+
Token = MIToken(MIToken::Error, C.remaining());
84+
ErrorCallback(C.location(),
85+
Twine("unexpected character '") + Twine(Char) + "'");
86+
return C.remaining();
87+
}

‎llvm/lib/CodeGen/MIRParser/MILexer.h

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
//===- MILexer.h - Lexer for machine instructions -------------------------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file declares the function that lexes the machine instruction source
11+
// string.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
16+
#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
17+
18+
#include "llvm/ADT/StringRef.h"
19+
#include "llvm/ADT/STLExtras.h"
20+
#include <functional>
21+
22+
namespace llvm {
23+
24+
class Twine;
25+
26+
/// A token produced by the machine instruction lexer.
27+
struct MIToken {
28+
enum TokenKind {
29+
// Markers
30+
Eof,
31+
Error,
32+
33+
// Identifier tokens
34+
Identifier
35+
};
36+
37+
private:
38+
TokenKind Kind;
39+
StringRef Range;
40+
41+
public:
42+
MIToken(TokenKind Kind, StringRef Range) : Kind(Kind), Range(Range) {}
43+
44+
TokenKind kind() const { return Kind; }
45+
46+
bool isError() const { return Kind == Error; }
47+
48+
bool is(TokenKind K) const { return Kind == K; }
49+
50+
bool isNot(TokenKind K) const { return Kind != K; }
51+
52+
StringRef::iterator location() const { return Range.begin(); }
53+
54+
StringRef stringValue() const { return Range; }
55+
};
56+
57+
/// Consume a single machine instruction token in the given source and return
58+
/// the remaining source string.
59+
StringRef lexMIToken(
60+
StringRef Source, MIToken &Token,
61+
function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
62+
63+
} // end namespace llvm
64+
65+
#endif

‎llvm/lib/CodeGen/MIRParser/MIParser.cpp

+40-11
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "MIParser.h"
15+
#include "MILexer.h"
1516
#include "llvm/ADT/StringMap.h"
1617
#include "llvm/CodeGen/MachineBasicBlock.h"
1718
#include "llvm/CodeGen/MachineFunction.h"
@@ -29,19 +30,27 @@ class MIParser {
2930
SourceMgr &SM;
3031
MachineFunction &MF;
3132
SMDiagnostic &Error;
32-
StringRef Source;
33+
StringRef Source, CurrentSource;
34+
MIToken Token;
3335
/// Maps from instruction names to op codes.
3436
StringMap<unsigned> Names2InstrOpCodes;
3537

3638
public:
3739
MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
3840
StringRef Source);
3941

42+
void lex();
43+
4044
/// Report an error at the current location with the given message.
4145
///
4246
/// This function always return true.
4347
bool error(const Twine &Msg);
4448

49+
/// Report an error at the given location with the given message.
50+
///
51+
/// This function always return true.
52+
bool error(StringRef::iterator Loc, const Twine &Msg);
53+
4554
MachineInstr *parse();
4655

4756
private:
@@ -50,38 +59,58 @@ class MIParser {
5059
/// Try to convert an instruction name to an opcode. Return true if the
5160
/// instruction name is invalid.
5261
bool parseInstrName(StringRef InstrName, unsigned &OpCode);
62+
63+
bool parseInstruction(unsigned &OpCode);
5364
};
5465

5566
} // end anonymous namespace
5667

5768
MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
5869
StringRef Source)
59-
: SM(SM), MF(MF), Error(Error), Source(Source) {}
70+
: SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source),
71+
Token(MIToken::Error, StringRef()) {}
72+
73+
void MIParser::lex() {
74+
CurrentSource = lexMIToken(
75+
CurrentSource, Token,
76+
[this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); });
77+
}
78+
79+
bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
6080

61-
bool MIParser::error(const Twine &Msg) {
81+
bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
6282
// TODO: Get the proper location in the MIR file, not just a location inside
6383
// the string.
64-
Error =
65-
SMDiagnostic(SM, SMLoc(), SM.getMemoryBuffer(SM.getMainFileID())
66-
->getBufferIdentifier(),
67-
1, 0, SourceMgr::DK_Error, Msg.str(), Source, None, None);
84+
assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
85+
Error = SMDiagnostic(
86+
SM, SMLoc(),
87+
SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1,
88+
Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None);
6889
return true;
6990
}
7091

7192
MachineInstr *MIParser::parse() {
72-
StringRef InstrName = Source;
93+
lex();
94+
7395
unsigned OpCode;
74-
if (parseInstrName(InstrName, OpCode)) {
75-
error(Twine("unknown machine instruction name '") + InstrName + "'");
96+
if (Token.isError() || parseInstruction(OpCode))
7697
return nullptr;
77-
}
7898

7999
// TODO: Parse the rest of instruction - machine operands, etc.
80100
const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
81101
auto *MI = MF.CreateMachineInstr(MCID, DebugLoc());
82102
return MI;
83103
}
84104

105+
bool MIParser::parseInstruction(unsigned &OpCode) {
106+
if (Token.isNot(MIToken::Identifier))
107+
return error("expected a machine instruction");
108+
StringRef InstrName = Token.stringValue();
109+
if (parseInstrName(InstrName, OpCode))
110+
return error(Twine("unknown machine instruction name '") + InstrName + "'");
111+
return false;
112+
}
113+
85114
void MIParser::initNames2InstrOpCodes() {
86115
if (!Names2InstrOpCodes.empty())
87116
return;

‎llvm/test/CodeGen/MIR/X86/machine-instructions.mir

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,5 @@ body:
2020
# CHECK: - IMUL32rri8
2121
# CHECK-NEXT: - RETQ
2222
- IMUL32rri8
23-
- RETQ
23+
- ' RETQ '
2424
...
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
2+
3+
--- |
4+
5+
define void @foo() {
6+
entry:
7+
ret void
8+
}
9+
10+
...
11+
---
12+
name: foo
13+
body:
14+
- name: entry
15+
instructions:
16+
# CHECK: 1:1: expected a machine instruction
17+
- ''
18+
...
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
2+
3+
--- |
4+
5+
define void @foo() {
6+
entry:
7+
ret void
8+
}
9+
10+
...
11+
---
12+
name: foo
13+
body:
14+
- name: entry
15+
instructions:
16+
# CHECK: 1:1: unexpected character '`'
17+
- '` RETQ'
18+
...

0 commit comments

Comments
 (0)
Please sign in to comment.