Index: lib/CodeGen/MIRParser/MILexer.h =================================================================== --- lib/CodeGen/MIRParser/MILexer.h +++ lib/CodeGen/MIRParser/MILexer.h @@ -39,6 +39,8 @@ // Identifier tokens Identifier, NamedRegister, + NamedMachineBasicBlock, + MachineBasicBlock, // Other tokens IntegerLiteral @@ -69,9 +71,23 @@ StringRef::iterator location() const { return Range.begin(); } - StringRef stringValue() const { return Range; } + StringRef stringValue() const { + switch (Kind) { + case NamedRegister: + return Range.drop_front(1); // Drop the '%' + case NamedMachineBasicBlock: + return Range.drop_front(4); // Drop '%bb.' + default: + break; + } + return Range; + } const APSInt &integerValue() const { return IntVal; } + + bool hasIntegerValue() const { + return Kind == IntegerLiteral || Kind == MachineBasicBlock; + } }; /// Consume a single machine instruction token in the given source and return Index: lib/CodeGen/MIRParser/MILexer.cpp =================================================================== --- lib/CodeGen/MIRParser/MILexer.cpp +++ lib/CodeGen/MIRParser/MILexer.cpp @@ -35,7 +35,7 @@ char peek(unsigned I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } - void advance() { ++Ptr; } + void advance(unsigned I = 1) { Ptr += I; } StringRef remaining() const { return StringRef(Ptr, End - Ptr); } @@ -70,7 +70,26 @@ return C; } +static Cursor lexMachineBasicBlock(Cursor C, MIToken &Token) { + auto Range = C; + C.advance(4); // Skip '%bb.' + if (!isdigit(C.peek())) { + while (isIdentifierChar(C.peek())) + C.advance(); + Token = MIToken(MIToken::NamedMachineBasicBlock, Range.upto(C)); + return C; + } + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), + APSInt(NumberRange.upto(C))); + return C; +} + static Cursor lexPercent(Cursor C, MIToken &Token) { + if (C.remaining().startswith("%bb.")) + return lexMachineBasicBlock(C, Token); auto Range = C; C.advance(); // Skip '%' while (isIdentifierChar(C.peek())) Index: lib/CodeGen/MIRParser/MIParser.h =================================================================== --- lib/CodeGen/MIRParser/MIParser.h +++ lib/CodeGen/MIRParser/MIParser.h @@ -15,15 +15,18 @@ #define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringMap.h" namespace llvm { +class MachineBasicBlock; class MachineInstr; class MachineFunction; class SMDiagnostic; class SourceMgr; MachineInstr *parseMachineInstr(SourceMgr &SM, MachineFunction &MF, + const StringMap &NamedMBBs, StringRef Src, SMDiagnostic &Error); } // end namespace llvm Index: lib/CodeGen/MIRParser/MIParser.cpp =================================================================== --- lib/CodeGen/MIRParser/MIParser.cpp +++ lib/CodeGen/MIRParser/MIParser.cpp @@ -32,6 +32,8 @@ SMDiagnostic &Error; StringRef Source, CurrentSource; MIToken Token; + /// Maps from basic block names to MBBs. + const StringMap &NamedMBBs; /// Maps from instruction names to op codes. StringMap Names2InstrOpCodes; /// Maps from register names to registers. @@ -39,6 +41,7 @@ public: MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, + const StringMap &NamedMBBs, StringRef Source); void lex(); @@ -58,9 +61,15 @@ bool parseRegister(unsigned &Reg); bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false); bool parseImmediateOperand(MachineOperand &Dest); + bool parseMBBOperand(MachineOperand &Dest); bool parseMachineOperand(MachineOperand &Dest); private: + /// Convert the integer literal in the current token into an unsigned integer. + /// + /// Return true if an error occurred. + bool getUnsigned(unsigned &Result); + void initNames2InstrOpCodes(); /// Try to convert an instruction name to an opcode. Return true if the @@ -79,9 +88,10 @@ } // end anonymous namespace MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, + const StringMap &NamedMBBs, StringRef Source) : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), - Token(MIToken::Error, StringRef()) {} + Token(MIToken::Error, StringRef()), NamedMBBs(NamedMBBs) {} void MIParser::lex() { CurrentSource = lexMIToken( @@ -178,7 +188,7 @@ Reg = 0; break; case MIToken::NamedRegister: { - StringRef Name = Token.stringValue().drop_front(1); // Drop the '%' + StringRef Name = Token.stringValue(); if (getRegisterByName(Name, Reg)) return error(Twine("unknown register name '") + Name + "'"); break; @@ -212,6 +222,43 @@ return false; } +bool MIParser::getUnsigned(unsigned &Result) { + assert(Token.hasIntegerValue() && "Expected a token with an integer value"); + const uint64_t Limit = uint64_t(std::numeric_limits::max()) + 1; + uint64_t Val64 = Token.integerValue().getLimitedValue(Limit); + if (Val64 == Limit) + return error("expected 32-bit integer (too large)"); + Result = Val64; + return false; +} + +bool MIParser::parseMBBOperand(MachineOperand &Dest) { + switch (Token.kind()) { + case MIToken::NamedMachineBasicBlock: { + auto Name = Token.stringValue(); + auto Result = NamedMBBs.find(Name); + if (Result == NamedMBBs.end()) + return error(Twine("unknown machine basic block name '") + Name + "'"); + Dest = MachineOperand::CreateMBB(Result->getValue()); + break; + } + case MIToken::MachineBasicBlock: { + unsigned BlockIdx; + if (getUnsigned(BlockIdx)) + return true; + if (BlockIdx >= MF.size()) + return error(Twine("unknown machine basic block #") + Twine(BlockIdx) + + "; function has only " + Twine(MF.size()) + " blocks"); + Dest = MachineOperand::CreateMBB(MF.getBlockNumbered(BlockIdx)); + break; + } + default: + llvm_unreachable("The current token should be a mbb reference"); + } + lex(); + return false; +} + bool MIParser::parseMachineOperand(MachineOperand &Dest) { switch (Token.kind()) { case MIToken::underscore: @@ -219,6 +266,9 @@ return parseRegisterOperand(Dest); case MIToken::IntegerLiteral: return parseImmediateOperand(Dest); + case MIToken::MachineBasicBlock: + case MIToken::NamedMachineBasicBlock: + return parseMBBOperand(Dest); case MIToken::Error: return true; default: @@ -271,7 +321,9 @@ return false; } -MachineInstr *llvm::parseMachineInstr(SourceMgr &SM, MachineFunction &MF, - StringRef Src, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src).parse(); +MachineInstr * +llvm::parseMachineInstr(SourceMgr &SM, MachineFunction &MF, + const StringMap &NamedMBBs, + StringRef Src, SMDiagnostic &Error) { + return MIParser(SM, MF, Error, NamedMBBs, Src).parse(); } Index: lib/CodeGen/MIRParser/MIRParser.cpp =================================================================== --- lib/CodeGen/MIRParser/MIRParser.cpp +++ lib/CodeGen/MIRParser/MIRParser.cpp @@ -80,8 +80,10 @@ /// Initialize the machine basic block using it's YAML representation. /// /// Return true if an error occurred. - bool initializeMachineBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, - const yaml::MachineBasicBlock &YamlMBB); + bool + initializeMachineBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, + const yaml::MachineBasicBlock &YamlMBB, + const StringMap &NamedMBBs); private: /// Return a MIR diagnostic converted from an LLVM assembly diagnostic. @@ -208,6 +210,7 @@ MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasInlineAsm(YamlMF.HasInlineAsm); const auto &F = *MF.getFunction(); + StringMap NamedMBBs; for (const auto &YamlMBB : YamlMF.BasicBlocks) { const BasicBlock *BB = nullptr; if (!YamlMBB.Name.empty()) { @@ -218,8 +221,17 @@ "' is not defined in the function '" + MF.getName() + "'"); } auto *MBB = MF.CreateMachineBasicBlock(BB); + if (!YamlMBB.Name.empty()) + NamedMBBs.insert(std::make_pair(YamlMBB.Name, MBB)); MF.insert(MF.end(), MBB); - if (initializeMachineBasicBlock(MF, *MBB, YamlMBB)) + } + + // Initialize the machine basic blocks after creating them all so that the + // machine instructions parser can resolve the MBB references. + unsigned I = 0; + for (const auto &YamlMBB : YamlMF.BasicBlocks) { + if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB, + NamedMBBs)) return true; } return false; @@ -227,7 +239,8 @@ bool MIRParserImpl::initializeMachineBasicBlock( MachineFunction &MF, MachineBasicBlock &MBB, - const yaml::MachineBasicBlock &YamlMBB) { + const yaml::MachineBasicBlock &YamlMBB, + const StringMap &NamedMBBs) { MBB.setAlignment(YamlMBB.Alignment); if (YamlMBB.AddressTaken) MBB.setHasAddressTaken(); @@ -235,7 +248,7 @@ // Parse the instructions. for (const auto &MISource : YamlMBB.Instructions) { SMDiagnostic Error; - if (auto *MI = parseMachineInstr(SM, MF, MISource, Error)) { + if (auto *MI = parseMachineInstr(SM, MF, NamedMBBs, MISource, Error)) { MBB.insert(MBB.end(), MI); continue; } Index: lib/CodeGen/MIRPrinter.cpp =================================================================== --- lib/CodeGen/MIRPrinter.cpp +++ lib/CodeGen/MIRPrinter.cpp @@ -78,7 +78,15 @@ YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasInlineAsm = MF.hasInlineAsm(); + + int I = 0; for (const auto &MBB : MF) { + // TODO: Allow printing of non sequentially numbered MBBs. + // This is currently needed as the basic block references get their index + // from MBB.getNumber(), thus it should be sequential so that the parser can + // map back to the correct MBBs when parsing the output. + assert(MBB.getNumber() == I++ && + "Can't print MBBs that aren't sequentially numbered"); yaml::MachineBasicBlock YamlMBB; convert(YamlMBB, MBB); YamlMF.BasicBlocks.push_back(YamlMBB); @@ -163,6 +171,16 @@ case MachineOperand::MO_Immediate: OS << Op.getImm(); break; + case MachineOperand::MO_MachineBasicBlock: + OS << "%bb."; + if (const auto *BB = Op.getMBB()->getBasicBlock()) { + if (BB->hasName()) { + OS << BB->getName(); + break; + } + } + OS << Op.getMBB()->getNumber(); + break; default: // TODO: Print the other machine operands. llvm_unreachable("Can't print this machine operand at the moment"); Index: test/CodeGen/MIR/X86/large-index-number-error.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/X86/large-index-number-error.mir @@ -0,0 +1,32 @@ +# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s + +--- | + + define i32 @foo(i32* %p) { + entry: + %a = load i32, i32* %p + %b = icmp sle i32 %a, 10 + br i1 %b, label %0, label %1 + + ;