diff --git a/llvm/test/TableGen/BitOffsetDecoder.td b/llvm/test/TableGen/BitOffsetDecoder.td --- a/llvm/test/TableGen/BitOffsetDecoder.td +++ b/llvm/test/TableGen/BitOffsetDecoder.td @@ -57,8 +57,27 @@ } -// CHECK: tmp = fieldFromInstruction(insn, 8, 7); -// CHECK: tmp = fieldFromInstruction(insn, 8, 8) << 3; -// CHECK: insertBits(tmp, fieldFromInstruction(insn, 8, 4), 7, 4); -// CHECK: insertBits(tmp, fieldFromInstruction(insn, 12, 4), 3, 4); -// CHECK: tmp = fieldFromInstruction(insn, 8, 8) << 4; +// Equivalent to... +// Tmp = 0 +// Tmp |= Inst[8+7-1:8] << 0 +// CHECK: // ExtractorID 1 +// CHECK-NEXT: 5, 0, 0, 8, 7, 0, + +// Equivalent to... +// Tmp = 0 +// Tmp |= Inst[8+8-1:8] << 3 +// CHECK: // ExtractorID 2 +// CHECK-NEXT: 5, 0, 0, 8, 8, 3, + +// Equivalent to... +// Tmp = 0 +// Tmp |= Inst[8+4-1:8] << 7 +// Tmp |= Inst[12+4-1:12] << 3 +// CHECK: // ExtractorID 3 +// CHECK-NEXT: 8, 0, 0, 8, 4, 7, 12, 4, 3, + +// Equivalent to... +// Tmp = 0 +// Tmp |= Inst[8+8-1:8] << 4 +// CHECK: // ExtractorID 4 +// CHECK-NEXT: 5, 0, 0, 8, 8, 4, diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td b/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td --- a/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td +++ b/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td @@ -39,8 +39,20 @@ } -// CHECK: tmp = fieldFromInstruction(insn, 9, 7) << 1; -// CHECK: tmp = 0x1; -// CHECK: insertBits(tmp, fieldFromInstruction(insn, 9, 7), 1, 7); -// CHECK: tmp = 0x100000000; -// CHECK: insertBits(tmp, fieldFromInstruction(insn, 8, 7), 25, 7); +// Equivalent to... +// Tmp = 0 +// Tmp |= Inst[9+7-1:9] << 1 +// CHECK: // ExtractorID 1 +// CHECK-NEXT: 5, 0, 0, 9, 7, 1, + +// Equivalent to... +// Tmp = 1 +// Tmp |= Inst[9+7-1:9] << 1 +// CHECK: // ExtractorID 2 +// CHECK-NEXT: 5, 0, 1, 9, 7, 1, + +// Equivalent to... +// Tmp = 0x100000000 +// Tmp |= Inst[8+7-1:8] << 25 +// CHECK: // ExtractorID 3 +// CHECK-NEXT: 5, 1, 0, 8, 7, 25, diff --git a/llvm/test/TableGen/VarLenDecoder.td b/llvm/test/TableGen/VarLenDecoder.td --- a/llvm/test/TableGen/VarLenDecoder.td +++ b/llvm/test/TableGen/VarLenDecoder.td @@ -47,9 +47,9 @@ // CHECK: MCD::OPC_ExtractField, 3, 5, // Inst{7-3} ... // CHECK-NEXT: MCD::OPC_FilterValue, 8, 4, 0, 0, // Skip to: 12 -// CHECK-NEXT: MCD::OPC_Decode, [[#OPCODE:]], 1, 0, // Opcode: FOO16 +// CHECK-NEXT: MCD::OPC_Decode, [[#OPCODE:]], 1, 1, // Opcode: FOO16 // CHECK-NEXT: MCD::OPC_FilterValue, 9, 4, 0, 0, // Skip to: 21 -// CHECK-NEXT: MCD::OPC_Decode, [[#OPCODE+1]], 1, 1, // Opcode: FOO32 +// CHECK-NEXT: MCD::OPC_Decode, [[#OPCODE+1]], 1, 2, // Opcode: FOO32 // CHECK-NEXT: MCD::OPC_Fail, // Instruction length table @@ -57,24 +57,51 @@ // CHECK-NEXT: 43, // CHECK-NEXT: }; -// CHECK: case 0: -// CHECK-NEXT: tmp = fieldFromInstruction(insn, 8, 3); -// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; } -// CHECK-NEXT: tmp = fieldFromInstruction(insn, 0, 3); -// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; } -// CHECK-NEXT: tmp = fieldFromInstruction(insn, 11, 16); -// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); -// CHECK-NEXT: return S; -// CHECK-NEXT: case 1: -// CHECK-NEXT: tmp = fieldFromInstruction(insn, 8, 3); -// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; } -// CHECK-NEXT: tmp = fieldFromInstruction(insn, 0, 3); -// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; } -// CHECK-NEXT: tmp = 0x0; -// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 11, 16), 16, 16); -// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 27, 16), 0, 16); -// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); -// CHECK-NEXT: return S; +// The sequence of decoder methods used. +// This identical sequence of decoder methods appears +// in both instructions. +// CHECK: // DecoderMethodSequenceID 1 +// CHECK-NEXT: 0, // DecodeRegClassRegisterClass +// CHECK-NEXT: 0, // DecodeRegClassRegisterClass +// CHECK-NEXT: 1, // DefaultDecodeImm + +// The sequence of bit extractors used. +// Notice how both instructions start with the same bit extractions +// to decode the registers, but the bit extractor for the immediate operand +// afterwards is different. +// CHECK: // DecoderExtractorSequenceID 1 +// CHECK-NEXT: 1, 2, 3, 0, +// CHECK-NEXT: // DecoderExtractorSequenceID 2 +// CHECK-NEXT: 1, 2, 4, 0, + +// The description of the bit extraction operations used. +// Sequences 1 and 2 are bit extractions for the registers. +// Sequences 3 and 4 describe the two ways immediate operands are decoded. +// --- +// Equivalent to... +// Tmp = 0 +// Tmp |= Inst[8+3-1:8] << 0 +// CHECK: // ExtractorID 1 +// CHECK-NEXT: 5, 0, 0, 8, 3, 0, +// --- +// Equivalent to... +// Tmp = 0 +// Tmp |= Inst[0+3-1:0] << 0 +// CHECK-NEXT: // ExtractorID 2 +// CHECK-NEXT: 5, 0, 0, 0, 3, 0, +// --- +// Equivalent to... +// Tmp = 0 +// Tmp |= Inst[11+16-1:11] << 0 +// CHECK-NEXT: // ExtractorID 3 +// CHECK-NEXT: 5, 0, 0, 11, 16, 0, +// --- +// Equivalent to... +// Tmp = 0 +// Tmp |= Inst[11+16-1:11] << 16 +// Tmp |= Inst[27+16-1:27] << 0 +// CHECK-NEXT: // ExtractorID 4 +// CHECK-NEXT: 8, 0, 0, 11, 16, 16, 27, 16, 0, // CHECK-LABEL: case MCD::OPC_ExtractField: { // CHECK: makeUp(insn, Start + Len); diff --git a/llvm/test/TableGen/trydecode-emission.td b/llvm/test/TableGen/trydecode-emission.td --- a/llvm/test/TableGen/trydecode-emission.td +++ b/llvm/test/TableGen/trydecode-emission.td @@ -36,8 +36,9 @@ // CHECK: /* 0 */ MCD::OPC_ExtractField, 4, 4, // Inst{7-4} ... // CHECK-NEXT: /* 3 */ MCD::OPC_FilterValue, 0, 18, 0, 0, // Skip to: 26 // CHECK-NEXT: /* 8 */ MCD::OPC_CheckField, 2, 2, 0, 7, 0, 0, // Skip to: 22 -// CHECK-NEXT: /* 15 */ MCD::OPC_TryDecode, {{[0-9]+}}, 1, 0, 0, 0, 0, // Opcode: InstB, skip to: 22 -// CHECK-NEXT: /* 22 */ MCD::OPC_Decode, {{[0-9]+}}, 1, 1, // Opcode: InstA +// CHECK-NEXT: /* 15 */ MCD::OPC_TryDecode, {{[0-9]+}}, 1, 1, 0, 0, 0, // Opcode: InstB, skip to: 22 +// CHECK-NEXT: /* 22 */ MCD::OPC_Decode, {{[0-9]+}}, 1, 2, // Opcode: InstA // CHECK-NEXT: /* 26 */ MCD::OPC_Fail, -// CHECK: if (DecodeInstB(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; } +// CHECK: // DecoderMethodSequenceID 1 +// CHECK-NEXT: 0, // DecodeInstB diff --git a/llvm/test/TableGen/trydecode-emission2.td b/llvm/test/TableGen/trydecode-emission2.td --- a/llvm/test/TableGen/trydecode-emission2.td +++ b/llvm/test/TableGen/trydecode-emission2.td @@ -35,10 +35,12 @@ // CHECK-NEXT: /* 8 */ MCD::OPC_ExtractField, 5, 3, // Inst{7-5} ... // CHECK-NEXT: /* 11 */ MCD::OPC_FilterValue, 0, 28, 0, 0, // Skip to: 44 // CHECK-NEXT: /* 16 */ MCD::OPC_CheckField, 0, 2, 3, 7, 0, 0, // Skip to: 30 -// CHECK-NEXT: /* 23 */ MCD::OPC_TryDecode, {{[0-9]+}}, 1, 0, 0, 0, 0, // Opcode: InstB, skip to: 30 +// CHECK-NEXT: /* 23 */ MCD::OPC_TryDecode, {{[0-9]+}}, 1, 1, 0, 0, 0, // Opcode: InstB, skip to: 30 // CHECK-NEXT: /* 30 */ MCD::OPC_CheckField, 3, 2, 0, 7, 0, 0, // Skip to: 44 -// CHECK-NEXT: /* 37 */ MCD::OPC_TryDecode, {{[0-9]+}}, 1, 1, 0, 0, 0, // Opcode: InstA, skip to: 44 +// CHECK-NEXT: /* 37 */ MCD::OPC_TryDecode, {{[0-9]+}}, 1, 2, 0, 0, 0, // Opcode: InstA, skip to: 44 // CHECK-NEXT: /* 44 */ MCD::OPC_Fail, -// CHECK: if (DecodeInstB(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; } -// CHECK: if (DecodeInstA(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; } +// CHECK: // DecoderMethodSequenceID 1 +// CHECK-NEXT: 0, // DecodeInstB +// CHECK-NEXT: // DecoderMethodSequenceID 2 +// CHECK-NEXT: 1, // DecodeInstA diff --git a/llvm/test/TableGen/trydecode-emission3.td b/llvm/test/TableGen/trydecode-emission3.td --- a/llvm/test/TableGen/trydecode-emission3.td +++ b/llvm/test/TableGen/trydecode-emission3.td @@ -37,8 +37,9 @@ // CHECK: /* 0 */ MCD::OPC_ExtractField, 4, 4, // Inst{7-4} ... // CHECK-NEXT: /* 3 */ MCD::OPC_FilterValue, 0, 18, 0, 0, // Skip to: 26 // CHECK-NEXT: /* 8 */ MCD::OPC_CheckField, 2, 2, 0, 7, 0, 0, // Skip to: 22 -// CHECK-NEXT: /* 15 */ MCD::OPC_TryDecode, {{[0-9]+}}, 1, 0, 0, 0, 0, // Opcode: InstB, skip to: 22 -// CHECK-NEXT: /* 22 */ MCD::OPC_Decode, {{[0-9]+}}, 1, 1, // Opcode: InstA +// CHECK-NEXT: /* 15 */ MCD::OPC_TryDecode, {{[0-9]+}}, 1, 1, 0, 0, 0, // Opcode: InstB, skip to: 22 +// CHECK-NEXT: /* 22 */ MCD::OPC_Decode, {{[0-9]+}}, 1, 2, // Opcode: InstA // CHECK-NEXT: /* 26 */ MCD::OPC_Fail, -// CHECK: if (DecodeInstBOp(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; } +// CHECK: // DecoderMethodSequenceID 1 +// CHECK-NEXT: 0, // DecodeInstBOp diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -18,12 +18,14 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/UniqueVector.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" @@ -33,6 +35,9 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" + +#include "Types.h" + #include #include #include @@ -83,17 +88,86 @@ const_iterator end() const { return Fields.end(); } }; +// After determining the opcode of an instruction, the bits of the instruction +// are read and used to build the operands of an MCInst. This conversion is done +// by a data-driven state machine. +// This state machine works by executing a sequence of commands ("codelets"). +// These commands generally extract bits from the instruction, then pass those +// bits to DecoderMethods. These DecoderMethods convert the bits into +// MCOperands. +enum class DecoderCodeletID : unsigned { + DispatchExtractor, + DispatchDecoderMethod, + DispatchCompleteDecoderMethod, + CopyInsnToTmp, + Terminator, +}; + +raw_ostream &operator<<(raw_ostream &OS, DecoderCodeletID ID) { + OS << (unsigned)ID; + return OS; +} + +typedef SmallVector DecoderCodeletIDVector; +typedef SmallVector DecoderMethodIDVector; +typedef SmallVector DecoderExtractorIDVector; +typedef UniqueVector DecoderCodeletSequenceSet; +typedef UniqueVector DecoderMethodSequenceSet; +typedef UniqueVector DecoderExtractorSequenceSet; +struct PerDecoderInfo { + // The ID of the codelet sequence used to implement this decoder. + // The decoding state machine will execute each codelet in this sequence + // one-by-one in order. + unsigned DecoderCodeletSequenceID; + // The ID of the sequence of decoder method calls that this decoder will make. + // When the decoding state machine executes a DispatchDecoderMethod codelet, + // it pops a DecoderMethod ID from this sequence, then calls that method. + unsigned DecoderMethodSequenceID; + // The ID of the sequence of bit extraction commands that this decoder will + // make. When the decoding state machine executes a DispatchExtractor + // codelet, it pops an Extractor ID from this sequence, then executes the + // bit extraction operations specified by that popped extractor. + unsigned DecoderExtractorSequenceID; + + // For UniqueVector + bool operator==(const PerDecoderInfo &RHS) const { + return std::make_tuple(DecoderCodeletSequenceID, DecoderMethodSequenceID, + DecoderExtractorSequenceID) == + std::make_tuple(RHS.DecoderCodeletSequenceID, + RHS.DecoderMethodSequenceID, + RHS.DecoderExtractorSequenceID); + } + bool operator<(const PerDecoderInfo &RHS) const { + return std::make_tuple(DecoderCodeletSequenceID, DecoderMethodSequenceID, + DecoderExtractorSequenceID) < + std::make_tuple(RHS.DecoderCodeletSequenceID, + RHS.DecoderMethodSequenceID, + RHS.DecoderExtractorSequenceID); + } +}; + typedef std::vector DecoderTable; typedef uint32_t DecoderFixup; typedef std::vector FixupList; typedef std::vector FixupScopeList; typedef SmallSetVector PredicateSet; -typedef SmallSetVector DecoderSet; +typedef UniqueVector DecoderMap; +typedef SmallSetVector DecoderMethodSet; +typedef UniqueVector> DecoderExtractorSet; struct DecoderTableInfo { DecoderTable Table; FixupScopeList FixupStack; PredicateSet Predicates; - DecoderSet Decoders; + // For compression purposes, the operations of the decoder state machine are + // de-duplicated as much as possible. Set data structures are used to keep + // track of unique operations, and IDs are used by the state machine to refer + // to items in these sets. + DecoderMap Decoders; + DecoderMethodSet DecoderMethods; + DecoderExtractorSet DecoderExtractors; + DecoderCodeletSequenceSet DecoderCodeletSequences; + DecoderMethodSequenceSet DecoderMethodSequences; + DecoderExtractorSequenceSet DecoderExtractorSequences; }; struct EncodingAndInst { @@ -148,9 +222,13 @@ void emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates, unsigned Indentation) const; - void emitDecoderFunction(formatted_raw_ostream &OS, - DecoderSet &Decoders, - unsigned Indentation) const; + void emitDecoderFunction( + formatted_raw_ostream &OS, const DecoderMap &Decoders, + const DecoderMethodSet &DecoderMethods, + const DecoderExtractorSet &DecoderExtractors, + const DecoderCodeletSequenceSet DecoderCodeletSequences, + const DecoderMethodSequenceSet DecoderMethodSequences, + const DecoderExtractorSequenceSet DecoderExtractorSequences) const; // run - Output the code emitter void run(raw_ostream &o); @@ -526,14 +604,24 @@ void emitSingletonTableEntry(DecoderTableInfo &TableInfo, const Filter &Best) const; - void emitBinaryParser(raw_ostream &o, unsigned &Indentation, + void emitBinaryParser(SmallVector &CodeletIDs, + SmallVector, 16> &DecoderMethods, + SmallVector, 16> &Extractors, const OperandInfo &OpInfo, bool &OpHasCompleteDecoder) const; - void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc, - bool &HasCompleteDecoder) const; - unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc, - bool &HasCompleteDecoder) const; + void emitDecoder(SmallVector &CodeletIDs, + SmallVector, 16> &DecoderMethods, + SmallVector, 16> &Extractors, + unsigned Opc, bool &HasCompleteDecoder) const; + + unsigned + getDecoderIndex(DecoderMap &Decoders, DecoderMethodSet &UniqueDecoderMethods, + DecoderExtractorSet &UniqueExtractors, + DecoderCodeletSequenceSet &UniqueDecoderCodeletSequences, + DecoderMethodSequenceSet &UniqueDecoderMethodSequences, + DecoderExtractorSequenceSet &UniqueDecoderExtractorSequences, + unsigned Opc, bool &HasCompleteDecoder) const; // Assign a single filter and run with it. void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); @@ -991,37 +1079,416 @@ OS.indent(Indentation) << "}\n\n"; } -void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, - DecoderSet &Decoders, - unsigned Indentation) const { - // The decoder function is just a big switch statement based on the - // input decoder index. - OS.indent(Indentation) << "template \n"; - OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S," - << " unsigned Idx, InsnType insn, MCInst &MI,\n"; - OS.indent(Indentation) - << " uint64_t " - << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; - Indentation += 2; - OS.indent(Indentation) << "DecodeComplete = true;\n"; +void emitDecoderMethodComment(raw_ostream &OS, StringRef Str) { + StringRef NewStr = Str.take_until([](char C) { return C == '\n'; }); + OS << " // " << NewStr << (NewStr.size() == Str.size() ? "" : "..."); +} + +void DecoderEmitter::emitDecoderFunction( + formatted_raw_ostream &OS, const DecoderMap &Decoders, + const DecoderMethodSet &DecoderMethods, + const DecoderExtractorSet &DecoderExtractors, + const DecoderCodeletSequenceSet DecoderCodeletSequences, + const DecoderMethodSequenceSet DecoderMethodSequences, + const DecoderExtractorSequenceSet DecoderExtractorSequences) const { + OS << "// Utility function for decoding plain immediates. Used as the default DecoderMethod.\n"; + OS << "template\n"; + OS << "static DecodeStatus DefaultDecodeImm(MCInst &MI, InsnType insn, uint64_t Address, const MCDisassembler *Decoder) {\n"; + OS << " MI.addOperand(MCOperand::createImm(insn));\n"; + OS << " return DecodeStatus::Success;\n"; + OS << "}\n\n"; + // Output some useful mappings + OS << "// The ID of the sequence of commands (\"codelets\") to execute to implement each decoder.\n"; + OS << "// Execution of the decoder consists of executing the codelets in the sequence in order.\n"; + OS << "// This indirection table allows two decoders to share the same sequence of codelets, which allows removing redundancies.\n"; + OS << "static const unsigned DecoderIDToDecoderCodeletSequenceID[] = {\n"; + // DecoderID 0 is the null decoder, so it returns a null sequence ID. + OS << " 0,"; + for (size_t DecoderIdx = 0; DecoderIdx < Decoders.size(); DecoderIdx++) { + if (DecoderIdx % 10 == 0) { + OS << "\n "; + } + OS << " " << Decoders[DecoderIdx + 1].DecoderCodeletSequenceID << ","; + } + OS << "\n};\n\n"; + OS << "// The ID of the list of decoder methods called by each decoder.\n"; + OS << "// This indirection table allows two decoders to share the same sequence of decoder methods, which allows removing redundancies.\n"; + OS << "static const unsigned DecoderIDToDecoderMethodSequenceID[] = {\n"; + // DecoderID 0 is the null decoder, so it returns a null sequence ID. + OS << " 0,"; + for (size_t DecoderIdx = 0; DecoderIdx < Decoders.size(); DecoderIdx++) { + if (DecoderIdx % 10 == 0) { + OS << "\n "; + } + OS << " " << Decoders[DecoderIdx + 1].DecoderMethodSequenceID << ","; + } + OS << "\n};\n\n"; + OS << "// The ID of the list of bit extraction operations done by each decoder.\n"; + OS << "// This indirection table allows two decoders to share the same sequence of extractions, which allows removing redundancies.\n"; + OS << "static const unsigned DecoderIDToDecoderExtractorSequenceID[] = {\n"; + // DecoderID 0 is the null decoder, so it returns a null sequence ID. + OS << " 0,"; + for (size_t DecoderIdx = 0; DecoderIdx < Decoders.size(); DecoderIdx++) { + if (DecoderIdx % 10 == 0) { + OS << "\n "; + } + OS << " " << Decoders[DecoderIdx + 1].DecoderExtractorSequenceID << ","; + } + OS << "\n};\n\n"; + + // Lookup table for the IDs of decoder methods associated to a sequence ID. + OS << "// The ID of the DecoderMethods to call for each decoder method sequence.\n"; + OS << "// Every time a decoder's execution sees a DispatchDecoderMethod codelet,\n"; + OS << "// it executes the next DecoderMethod in its sequence of decoder methods.\n"; + OS << "static const unsigned Flat_DecoderMethodSequenceIDToDecoderMethodIDs[] = " + "{\n"; + std::vector DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex; + // Sequence IDs start at 1, so write a null value for ID 0. + DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.push_back(0); + OS << " ~0U,\n"; + // Push the start for the first iteration. + DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.push_back(1); + // Now write the non-null sequence IDs' method IDs. + for (size_t DecoderMethodSequenceID = 1; + DecoderMethodSequenceID <= DecoderMethodSequences.size(); + DecoderMethodSequenceID++) { + OS << " // DecoderMethodSequenceID " << DecoderMethodSequenceID << "\n"; + const auto &DecoderMethodIDs = + DecoderMethodSequences[DecoderMethodSequenceID]; + for (size_t DecoderMethodIdx = 0; + DecoderMethodIdx < DecoderMethodIDs.size(); DecoderMethodIdx++) { + OS << " " << DecoderMethodIDs[DecoderMethodIdx] << ","; + emitDecoderMethodComment( + OS, DecoderMethods[DecoderMethodIDs[DecoderMethodIdx]]); + OS << "\n"; + } + // Push the start for the next iteration. + DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.push_back( + DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.back() + + DecoderMethodIDs.size()); + } + // Pop the start for the past-the-end iteration that never really happened. + DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.pop_back(); + OS << "};\n"; + OS << "\n"; + OS << "// Gets the index for the data of a given DecoderMethodSequenceID in the Flat_DecoderMethodSequenceIDToDecoderMethodIDs array.\n"; + OS << "static const unsigned " + "DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex[] = {"; + for (size_t DecoderMethodSequenceID = 0; + DecoderMethodSequenceID < DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.size(); + DecoderMethodSequenceID++) { + if (DecoderMethodSequenceID % 10 == 0) { + OS << "\n "; + } + OS << " " + << DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex + [DecoderMethodSequenceID] + << ","; + } + OS << "\n};\n"; + OS << "\n"; + + // This dispatcher allows calling decoder methods in a generic way, which + // allows the decoder to be more compressed and more table-driven. + OS << "// Executes the next decoder method in the decoder method sequence, and increments the sequence's iterator to the following entry.\n"; + OS << "template \n" + "static DecodeStatus DispatchDecoderMethod(const unsigned " + "*&NextDecoderMethod, MCInst &MI, const InsnType &tmp, uint64_t " + "Address, const MCDisassembler *Decoder) {\n" + " const unsigned CurrDecoderMethod = *NextDecoderMethod++;\n" + " switch (CurrDecoderMethod) {\n" + " default:\n" + " llvm_unreachable(\"Unknown DecoderMethod ID.\");\n"; + for (size_t DecoderMethodIdx = 0; DecoderMethodIdx < DecoderMethods.size(); + DecoderMethodIdx++) { + OS << " case " << DecoderMethodIdx << ":\n" + << " LLVM_DEBUG(dbgs() << \"Calling DecoderMethod " + << DecoderMethodIdx << " \\\"\" << " + << "R\"(" << DecoderMethods[DecoderMethodIdx].val() << ")\"" + << " << \"\\\" with input \" << tmp << \"\\n\");\n" + << " return " << DecoderMethods[DecoderMethodIdx].val() + << "(MI, tmp, Address, Decoder);\n"; + } + OS << " }\n" + "}\n\n"; + + // Lookup table for the IDs of extractors associated to a sequence ID. + // +1 in size to guarantee a sentinel value (to catch bugs). + OS << "// The ID of the extractors to call for each extractor method sequence.\n"; + OS << "// Every time a decoder's execution sees a DispatchExtractor codelet,\n"; + OS << "// it executes the next Extractor in its sequence of extractors.\n"; + StringRef ExtractorIDType = getMinimalTypeForRange(DecoderExtractors.size()); + OS << "using ExtractorIDType = " << ExtractorIDType << ";\n"; + OS << "static const " << ExtractorIDType + << " Flat_DecoderExtractorSequenceIDToExtractorIDs[] = {\n"; + std::vector DecoderExtractorSequenceIDToFlatExtractorIDsIndex; + // Sequence IDs start at 1, so write a null value for ID 0. + DecoderExtractorSequenceIDToFlatExtractorIDsIndex.push_back(0); + OS << " 0,\n"; + // Push the start for the first iteration. + DecoderExtractorSequenceIDToFlatExtractorIDsIndex.push_back(1); + // Now write the non-null sequence IDs' extractor IDs. + for (size_t DecoderExtractorSequenceID = 1; + DecoderExtractorSequenceID <= DecoderExtractorSequences.size(); + DecoderExtractorSequenceID++) { + OS << " // DecoderExtractorSequenceID " << DecoderExtractorSequenceID << "\n"; + OS << " "; + const auto &ExtractorIDs = + DecoderExtractorSequences[DecoderExtractorSequenceID]; + for (size_t ExtractorIdx = 0; ExtractorIdx < ExtractorIDs.size(); + ExtractorIdx++) { + OS << ExtractorIDs[ExtractorIdx]; + OS << ", "; + } + // Push the start for the next iteration. + DecoderExtractorSequenceIDToFlatExtractorIDsIndex.push_back( + DecoderExtractorSequenceIDToFlatExtractorIDsIndex.back() + + ExtractorIDs.size()); + OS << "0,\n"; + // Account for final 0. + DecoderExtractorSequenceIDToFlatExtractorIDsIndex.back() += 1; + } + // Pop the start for the past-the-end iteration that never really happened. + DecoderExtractorSequenceIDToFlatExtractorIDsIndex.pop_back(); + OS << "};\n"; + OS << "\n"; + OS << "// Gets the index for the data of a given DecoderExtractorSequenceID in the Flat_DecoderExtractorSequenceIDToExtractorIDs array.\n"; + OS << "static const unsigned " + "DecoderExtractorSequenceIDToFlatExtractorIDsIndex[] = {"; + for (size_t DecoderExtractorSequenceID = 0; + DecoderExtractorSequenceID < DecoderExtractorSequenceIDToFlatExtractorIDsIndex.size(); + DecoderExtractorSequenceID++) { + if (DecoderExtractorSequenceID % 10 == 0) { + OS << "\n "; + } + OS << " " + << DecoderExtractorSequenceIDToFlatExtractorIDsIndex + [DecoderExtractorSequenceID] + << ","; + } + OS << "\n};\n"; + OS << "\n"; + + OS << "// The specification of the bit extraction commands for each bit extractor.\n"; + OS << "// Specifies eg. the operands for the shifts and masks that extract bits.\n"; + OS << "static const uint8_t Flat_ExtractorIDToExtractor[] = {\n"; + std::vector ExtractorIDToFlatExtractorIndex; + // Extractor IDs start at 1, so write a null value for ID 0. + ExtractorIDToFlatExtractorIndex.push_back(0); + OS << " 0,\n"; + ExtractorIDToFlatExtractorIndex.push_back(1); + for (size_t ExtractorIdx = 0; ExtractorIdx < DecoderExtractors.size(); + ExtractorIdx++) { + unsigned ExtractorID = unsigned(ExtractorIdx) + 1; + OS << " // ExtractorID " << ExtractorID << "\n"; + const auto &Extractor = DecoderExtractors[ExtractorID]; + unsigned BytesOutputtedForExtractor = 0; + assert(isUInt<8>(Extractor.size()) && "Too large"); + OS << " " << Extractor.size() << ","; + BytesOutputtedForExtractor++; + for (const uint32_t ExtractorCmd : Extractor) { + SmallString<16> CmdBytes; + raw_svector_ostream S(CmdBytes); + encodeULEB128(ExtractorCmd, S); + for (const uint8_t Byte : S.str()) { + OS << " " << (unsigned)Byte << ","; + BytesOutputtedForExtractor++; + } + } + // Push the start for the next iteration. + ExtractorIDToFlatExtractorIndex.push_back( + ExtractorIDToFlatExtractorIndex.back() + BytesOutputtedForExtractor); + OS << "\n"; + } + // Pop the start for the past-the-end iteration that never really happened. + ExtractorIDToFlatExtractorIndex.pop_back(); + OS << "};\n"; + OS << "\n"; + OS << "// Gets the index for the data of a given Extractor in the Flat_ExtractorIDToExtractor array.\n"; + OS << "static const unsigned " + "ExtractorIDToFlatExtractorIndex[] = {"; + for (size_t ExtractorID = 0; + ExtractorID < ExtractorIDToFlatExtractorIndex.size(); ExtractorID++) { + if (ExtractorID % 10 == 0) { + OS << "\n "; + } + OS << " " << ExtractorIDToFlatExtractorIndex[ExtractorID] << ","; + } + OS << "\n};\n"; + OS << "\n"; + + // Lookup table for the IDs of codelets associated to a sequence ID. + // +1 in size to guarantee a sentinel value (to catch bugs). + OS << "// The list of codelets to execute for each codelet sequence.\n"; + OS << "// Executing a decoder consists of running all codelets in its sequence.\n"; + StringRef CodeletIDType = + getMinimalTypeForRange((unsigned)DecoderCodeletID::Terminator); + OS << "using CodeletIDType = " << CodeletIDType << ";\n"; + OS << "static const CodeletIDType Flat_DecoderCodeletSequenceIDToCodeletIDs[] = " + "{\n"; + std::vector DecoderCodeletSequenceIDToFlatCodeletIDsIndex; + // Sequence IDs start at 1, so write a null value for ID 0. + DecoderCodeletSequenceIDToFlatCodeletIDsIndex.push_back(0); + OS << " " << DecoderCodeletID::Terminator << ",\n"; + // Push the start for the first iteration. + DecoderCodeletSequenceIDToFlatCodeletIDsIndex.push_back(1); + // Now write the non-null sequence IDs' codelet IDs. + for (size_t DecoderCodeletSequenceID = 1; + DecoderCodeletSequenceID <= DecoderCodeletSequences.size(); + DecoderCodeletSequenceID++) { + OS << " // CodeletSequenceID " << DecoderCodeletSequenceID << "\n"; + OS << " "; + const auto &CodeletIDs = DecoderCodeletSequences[DecoderCodeletSequenceID]; + for (size_t CodeletIdx = 0; CodeletIdx < CodeletIDs.size(); CodeletIdx++) { + OS << CodeletIDs[CodeletIdx]; + OS << ", "; + } + // Push the start for the next iteration. + DecoderCodeletSequenceIDToFlatCodeletIDsIndex.push_back( + DecoderCodeletSequenceIDToFlatCodeletIDsIndex.back() + + CodeletIDs.size()); + OS << DecoderCodeletID::Terminator << ",\n"; + // Account for final terminator. + DecoderCodeletSequenceIDToFlatCodeletIDsIndex.back() += 1; + } + // Pop the start for the past-the-end iteration that never really happened. + DecoderCodeletSequenceIDToFlatCodeletIDsIndex.pop_back(); + OS << "};\n"; + OS << "\n"; + OS << "// Gets the index for the data of a given DecoderCodeletSequenceID in the Flat_DecoderCodeletSequenceIDToCodeletIDs array.\n"; + OS << "static const unsigned " + "DecoderCodeletSequenceIDToFlatCodeletIDsIndex[] = {"; + for (size_t DecoderCodeletSequenceID = 0; + DecoderCodeletSequenceID < + DecoderCodeletSequenceIDToFlatCodeletIDsIndex.size(); + DecoderCodeletSequenceID++) { + if (DecoderCodeletSequenceID % 10 == 0) { + OS << "\n "; + } + OS << " " + << DecoderCodeletSequenceIDToFlatCodeletIDsIndex + [DecoderCodeletSequenceID] + << ","; + } + OS << "\n};\n"; + OS << "\n"; + + // This dispatcher allows doing bit extraction operations in a generic way, + // which allows the decoder to be more compressed and more table-driven. + OS << "// Executes the next bit extractor in the extractor sequence, and increments the sequence's iterator to the following entry.\n"; + OS << "template \n" + "static void DispatchExtractor(const ExtractorIDType " + "*&NextExtractor, const InsnType &insn, InsnType &tmp) {\n" + " const ExtractorIDType ExtractorID = *NextExtractor++;\n" + " LLVM_DEBUG(dbgs() << \"Executing Extractor \" << ExtractorID << \":\\n\");\n" + " const uint8_t *Extractor = &Flat_ExtractorIDToExtractor[ExtractorIDToFlatExtractorIndex[ExtractorID]];\n" + " // Number of tokens that make up this extractor, minus the Len.\n" + " uint32_t ExtractorLen = Extractor[0];\n" + " assert(ExtractorLen != 0 && \"Ran an empty extractor?\");\n" + " const uint8_t *ExtractorStart = &Extractor[1];\n" + " // Initial value.\n" + " unsigned WordLen0, WordLen1;\n" + " uint32_t Word0 = decodeULEB128(ExtractorStart, &WordLen0);\n" + " uint32_t Word1 = decodeULEB128(ExtractorStart + WordLen0, &WordLen1);\n" + " tmp = Make_64(Word0, Word1);\n" + " LLVM_DEBUG(dbgs() << \" Extractor: tmp = \" << tmp << \"\\n\");\n" + " ExtractorLen -= 2;\n" + " // Now execute every extraction command one-by-one.\n" + " for (const uint8_t *CurrExtractorCmd = &ExtractorStart[WordLen0 + WordLen1];\n" + " ExtractorLen > 0;\n" + " ExtractorLen -= 3) {\n" + " unsigned CmdLen;\n" + " const uint32_t Base = decodeULEB128(CurrExtractorCmd, &CmdLen);\n" + " CurrExtractorCmd += CmdLen;\n" + " const uint32_t Width = decodeULEB128(CurrExtractorCmd, &CmdLen);\n" + " CurrExtractorCmd += CmdLen;\n" + " const uint32_t Offset = decodeULEB128(CurrExtractorCmd, &CmdLen);\n" + " CurrExtractorCmd += CmdLen;\n" + " InsnType ExtractedBits = fieldFromInstruction(insn, Base, Width) << Offset;\n" + " LLVM_DEBUG(dbgs() << \" Extractor: tmp |= \" << ExtractedBits\n" + " << \" // fieldFromInstruction(\"\n" + " << \"insn=\" << insn \n" + " << \", Base=\" << Base << \", Width=\" << Width << \")\"\n" + " << \" << Offset=\" << Offset << \"\\n\");\n" + " tmp |= ExtractedBits;\n" + " }\n" + "}\n\n"; + + OS << "template \n"; + OS << "static DecodeStatus decodeToMCInst(DecodeStatus S," + << " unsigned Idx, const InsnType &insn, MCInst &MI,\n"; + OS << " uint64_t " + << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; + OS << " DecodeComplete = true;\n"; // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits // It would be better for emitBinaryParser to use a 64-bit tmp whenever // possible but fall back to an InsnType-sized tmp for truly large fields. - OS.indent(Indentation) << "using TmpType = " - "std::conditional_t::" - "value, InsnType, uint64_t>;\n"; - OS.indent(Indentation) << "TmpType tmp;\n"; - OS.indent(Indentation) << "switch (Idx) {\n"; - OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; - unsigned Index = 0; - for (const auto &Decoder : Decoders) { - OS.indent(Indentation) << "case " << Index++ << ":\n"; - OS << Decoder; - OS.indent(Indentation+2) << "return S;\n"; - } - OS.indent(Indentation) << "}\n"; - Indentation -= 2; - OS.indent(Indentation) << "}\n\n"; + OS << " using TmpType = " + "std::conditional_t::" + "value, InsnType, uint64_t>;\n"; + OS << " // Grab all the pieces of the state machine for this decoder.\n"; + OS << " const unsigned DecoderCodeletSequenceID = " + "DecoderIDToDecoderCodeletSequenceID[Idx];\n"; + OS << " const unsigned DecoderMethodSequenceID = " + "DecoderIDToDecoderMethodSequenceID[Idx];\n"; + OS << " const unsigned DecoderExtractorSequenceID = " + "DecoderIDToDecoderExtractorSequenceID[Idx];\n"; + OS << " const CodeletIDType *NextCodelet = " + "&Flat_DecoderCodeletSequenceIDToCodeletIDs[" + "DecoderCodeletSequenceIDToFlatCodeletIDsIndex[" + "DecoderCodeletSequenceID]];\n"; + OS << " const unsigned *NextDecoderMethod = " + "&Flat_DecoderMethodSequenceIDToDecoderMethodIDs[" + "DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex[" + "DecoderMethodSequenceID]];\n"; + OS << " const ExtractorIDType *NextExtractor = " + "&Flat_DecoderExtractorSequenceIDToExtractorIDs[" + "DecoderExtractorSequenceIDToFlatExtractorIDsIndex[" + "DecoderExtractorSequenceID]];\n"; + OS << " DecodeComplete = true;\n"; + OS << " // \"tmp\" is the single register used by the decoder state " + "machine.\n"; + OS << " // It's used as storage to pass data between different codelets " + "being executed.\n"; + OS << " TmpType tmp;\n"; + OS << " while (*NextCodelet != " << DecoderCodeletID::Terminator << ") {\n"; + OS << " const unsigned CodeletID = *NextCodelet;\n"; + OS << " switch (CodeletID) {\n"; + OS << " default:\n"; + OS << " llvm_unreachable(\"Invalid Codelet ID\");\n"; + OS << " case " << DecoderCodeletID::DispatchExtractor << ":\n"; + OS << " // DispatchExtractor: Calls a generic function that extracts " + "bits from the encoding and stores them in \"tmp\"\n"; + OS << " DispatchExtractor(NextExtractor, insn, tmp);\n"; + OS << " break;\n"; + OS << " case " << DecoderCodeletID::DispatchDecoderMethod << ":\n"; + OS << " // DispatchDecoderMethod: Calls a generic dispatch function " + "that calls a method to converts the extracted bits into MCInst " + "operands.\n"; + OS << " " << GuardPrefix + << "DispatchDecoderMethod(NextDecoderMethod, MI, tmp, Address, Decoder)" + << GuardPostfix << " {\n"; + OS << " DecodeComplete = false;\n"; + OS << " return MCDisassembler::Fail;\n"; + OS << " }\n"; + OS << " break;\n"; + OS << " case " << DecoderCodeletID::DispatchCompleteDecoderMethod << ":\n"; + OS << " // DispatchCompleteDecoderMethod: Calls a decoder method while requiring it to be complete (and therefore can't fail.)\n"; + OS << " " << GuardPrefix + << "DispatchDecoderMethod(NextDecoderMethod, MI, tmp, Address, Decoder)" + << GuardPostfix << "\n"; + OS << " return MCDisassembler::Fail;\n"; + OS << " break;\n"; + OS << " case " << DecoderCodeletID::CopyInsnToTmp << ":\n"; + OS << " // CopyInsnToTmp: Copies all of \"insn\" into the \"tmp\" variable.\n"; + OS << " // Mainly used for DecoderMethods that are run on the whole instruction.\n"; + OS << " tmp = insn;\n"; + OS << " break;\n"; + OS << " }\n"; + OS << " NextCodelet++;\n"; + OS << " }\n"; + OS << " return S;\n"; + OS << "}\n\n"; } // Populates the field of the insn given the start position and the number of @@ -1139,91 +1606,124 @@ return Num; } -void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, - const OperandInfo &OpInfo, - bool &OpHasCompleteDecoder) const { - const std::string &Decoder = OpInfo.Decoder; - - bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; - - if (UseInsertBits) { - o.indent(Indentation) << "tmp = 0x"; - o.write_hex(OpInfo.InitValue); - o << ";\n"; - } +void FilterChooser::emitBinaryParser( + SmallVector &CodeletIDs, + SmallVector, 16> &DecoderMethods, + SmallVector, 16> &Extractors, + const OperandInfo &OpInfo, bool &OpHasCompleteDecoder) const { + // Build the extractor first then pass its result to the decoder method. + { + SmallVector Extractor; + // Split into low and high to avoid storing all commands as 64-bit ints. + Extractor.push_back(Hi_32(OpInfo.InitValue)); + Extractor.push_back(Lo_32(OpInfo.InitValue)); + + for (const EncodingField &EF : OpInfo) { + Extractor.push_back(EF.Base); + Extractor.push_back(EF.Width); + Extractor.push_back(EF.Offset); + } - for (const EncodingField &EF : OpInfo) { - o.indent(Indentation); - if (UseInsertBits) - o << "insertBits(tmp, "; - else - o << "tmp = "; - o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; - if (UseInsertBits) - o << ", " << EF.Offset << ", " << EF.Width << ')'; - else if (EF.Offset != 0) - o << " << " << EF.Offset; - o << ";\n"; + CodeletIDs.push_back(DecoderCodeletID::DispatchExtractor); + Extractors.push_back(std::move(Extractor)); } - if (Decoder != "") { + const std::string &UserDecoder = OpInfo.Decoder; + StringRef DecoderMethod; + if (UserDecoder != "") { OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; - o.indent(Indentation) << Emitter->GuardPrefix << Decoder - << "(MI, tmp, Address, Decoder)" - << Emitter->GuardPostfix - << " { " << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ") - << "return MCDisassembler::Fail; }\n"; + DecoderMethod = UserDecoder; } else { OpHasCompleteDecoder = true; - o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n"; + // Wrapped in lambda to conform to the syntax of decoder methods. + DecoderMethod = "DefaultDecodeImm"; } + + CodeletIDs.push_back(OpHasCompleteDecoder + ? DecoderCodeletID::DispatchCompleteDecoderMethod + : DecoderCodeletID::DispatchDecoderMethod); + DecoderMethods.push_back(DecoderMethod); } -void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation, - unsigned Opc, bool &HasCompleteDecoder) const { +void FilterChooser::emitDecoder( + SmallVector &CodeletIDs, + SmallVector, 16> &DecoderMethods, + SmallVector, 16> &Extractors, unsigned Opc, + bool &HasCompleteDecoder) const { HasCompleteDecoder = true; for (const auto &Op : Operands.find(Opc)->second) { // If a custom instruction decoder was specified, use that. if (Op.numFields() == 0 && !Op.Decoder.empty()) { HasCompleteDecoder = Op.HasCompleteDecoder; - OS.indent(Indentation) << Emitter->GuardPrefix << Op.Decoder - << "(MI, insn, Address, Decoder)" - << Emitter->GuardPostfix - << " { " << (HasCompleteDecoder ? "" : "DecodeComplete = false; ") - << "return MCDisassembler::Fail; }\n"; + CodeletIDs.push_back(DecoderCodeletID::CopyInsnToTmp); + CodeletIDs.push_back(HasCompleteDecoder + ? DecoderCodeletID::DispatchCompleteDecoderMethod + : DecoderCodeletID::DispatchDecoderMethod); + DecoderMethods.emplace_back(Op.Decoder); break; } bool OpHasCompleteDecoder; - emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder); + emitBinaryParser(CodeletIDs, DecoderMethods, Extractors, Op, + OpHasCompleteDecoder); if (!OpHasCompleteDecoder) HasCompleteDecoder = false; } } -unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, - unsigned Opc, - bool &HasCompleteDecoder) const { - // Build up the predicate string. - SmallString<256> Decoder; - // FIXME: emitDecoder() function can take a buffer directly rather than - // a stream. - raw_svector_ostream S(Decoder); - unsigned I = 4; - emitDecoder(S, I, Opc, HasCompleteDecoder); - - // Using the full decoder string as the key value here is a bit - // heavyweight, but is effective. If the string comparisons become a - // performance concern, we can implement a mangling of the predicate - // data easily enough with a map back to the actual string. That's - // overkill for now, though. +unsigned FilterChooser::getDecoderIndex( + DecoderMap &Decoders, DecoderMethodSet &UniqueDecoderMethods, + DecoderExtractorSet &UniqueExtractors, + DecoderCodeletSequenceSet &UniqueDecoderCodeletSequences, + DecoderMethodSequenceSet &UniqueDecoderMethodSequences, + DecoderExtractorSequenceSet &UniqueDecoderExtractorSequences, unsigned Opc, + bool &HasCompleteDecoder) const { + // Build up the list of codelets and methods that decode this instruction. + DecoderCodeletIDVector DecoderCodeletIDs; + SmallVector, 16> DecoderMethods; + SmallVector, 16> Extractors; + emitDecoder(DecoderCodeletIDs, DecoderMethods, Extractors, Opc, + HasCompleteDecoder); + + // Add new sequence or get a reference to a previously inserted identical one. + const unsigned CodeletSequenceID = + UniqueDecoderCodeletSequences.insert(DecoderCodeletIDs); + + // Convert method strings to the corresponding method ID sequence. + DecoderMethodIDVector DecoderMethodIDs; + for (const auto &DecoderMethod : DecoderMethods) { + // Make sure the method exists in the table. + UniqueDecoderMethods.insert(CachedHashString(DecoderMethod)); + // Get the unique ID of the method from its location in the table. + DecoderMethodSet::const_iterator P = + find(UniqueDecoderMethods, DecoderMethod); + // Append the unique ID of the method to the decoder's methods. + DecoderMethodIDs.push_back((unsigned)(P - UniqueDecoderMethods.begin())); + } - // Make sure the predicate is in the table. - Decoders.insert(CachedHashString(Decoder)); - // Now figure out the index for when we write out the table. - DecoderSet::const_iterator P = find(Decoders, Decoder.str()); - return (unsigned)(P - Decoders.begin()); + // Add new sequence or get a reference to a previously inserted identical one. + const unsigned MethodSequenceID = + UniqueDecoderMethodSequences.insert(DecoderMethodIDs); + + // Convert extractors to the corresponding extractor ID sequence. + DecoderExtractorIDVector ExtractorIDs; + for (const auto &Extractor : Extractors) { + // Make sure the extractor exists in the table. + const unsigned ExtractorID = UniqueExtractors.insert(Extractor); + // Append the unique ID of the extractor to the decoder's extractors. + ExtractorIDs.push_back(ExtractorID); + } + + // Add new extractor or get reference to previously inserted identical one. + const unsigned ExtractorSequenceID = + UniqueDecoderExtractorSequences.insert(ExtractorIDs); + + // Add new decoder or get a reference to previously inserted identical one. + const PerDecoderInfo Decoder{CodeletSequenceID, MethodSequenceID, + ExtractorSequenceID}; + const unsigned DecoderID = Decoders.insert(Decoder); + return DecoderID; } bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, @@ -1442,8 +1942,10 @@ emitSoftFailTableEntry(TableInfo, Opc.EncodingID); bool HasCompleteDecoder; - unsigned DIdx = - getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder); + unsigned DIdx = getDecoderIndex( + TableInfo.Decoders, TableInfo.DecoderMethods, TableInfo.DecoderExtractors, + TableInfo.DecoderCodeletSequences, TableInfo.DecoderMethodSequences, + TableInfo.DecoderExtractorSequences, Opc.EncodingID, HasCompleteDecoder); // Produce OPC_Decode or OPC_TryDecode opcode based on the information // whether the instruction decoder is complete or not. If it is complete @@ -2676,7 +3178,10 @@ emitPredicateFunction(OS, TableInfo.Predicates, 0); // Emit the decoder function. - emitDecoderFunction(OS, TableInfo.Decoders, 0); + emitDecoderFunction( + OS, TableInfo.Decoders, TableInfo.DecoderMethods, + TableInfo.DecoderExtractors, TableInfo.DecoderCodeletSequences, + TableInfo.DecoderMethodSequences, TableInfo.DecoderExtractorSequences); // Emit the main entry point for the decoder, decodeInstruction(). emitDecodeInstruction(OS, IsVarLenInst);