Skip to content

Commit 8a0453e

Browse files
author
Colin LeMahieu
committedNov 9, 2015
[AsmParser] Backends can parameterize ASM tokenization.
llvm-svn: 252439
1 parent 87f5e80 commit 8a0453e

File tree

7 files changed

+103
-47
lines changed

7 files changed

+103
-47
lines changed
 

‎llvm/include/llvm/Target/Target.td

+9
Original file line numberDiff line numberDiff line change
@@ -965,6 +965,15 @@ class AsmParserVariant {
965965
// register tokens as constrained registers, instead of tokens, for the
966966
// purposes of matching.
967967
string RegisterPrefix = "";
968+
969+
// TokenizingCharacters - Characters that are standalone tokens
970+
string TokenizingCharacters = "[]*!";
971+
972+
// SeparatorCharacters - Characters that are not tokens
973+
string SeparatorCharacters = " \t,";
974+
975+
// BreakCharacters - Characters that start new identifiers
976+
string BreakCharacters = "";
968977
}
969978
def DefaultAsmParserVariant : AsmParserVariant;
970979

‎llvm/lib/MC/MCParser/AsmParser.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -1334,6 +1334,15 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
13341334
// Treat '.' as a valid identifier in this context.
13351335
Lex();
13361336
IDVal = ".";
1337+
} else if (Lexer.is(AsmToken::LCurly)) {
1338+
// Treat '{' as a valid identifier in this context.
1339+
Lex();
1340+
IDVal = "{";
1341+
1342+
} else if (Lexer.is(AsmToken::RCurly)) {
1343+
// Treat '}' as a valid identifier in this context.
1344+
Lex();
1345+
IDVal = "}";
13371346
} else if (parseIdentifier(IDVal)) {
13381347
if (!TheCondState.Ignore)
13391348
return TokError("unexpected token at start of statement");

‎llvm/lib/Target/AArch64/AArch64.td

+2
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,13 @@ def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
125125
def GenericAsmParserVariant : AsmParserVariant {
126126
int Variant = 0;
127127
string Name = "generic";
128+
string BreakCharacters = ".";
128129
}
129130

130131
def AppleAsmParserVariant : AsmParserVariant {
131132
int Variant = 1;
132133
string Name = "apple-neon";
134+
string BreakCharacters = ".";
133135
}
134136

135137
//===----------------------------------------------------------------------===//

‎llvm/lib/Target/ARM/ARM.td

+7
Original file line numberDiff line numberDiff line change
@@ -511,8 +511,15 @@ def ARMAsmWriter : AsmWriter {
511511
bit isMCAsmWriter = 1;
512512
}
513513

514+
def ARMAsmParserVariant : AsmParserVariant {
515+
int Variant = 0;
516+
string Name = "ARM";
517+
string BreakCharacters = ".";
518+
}
519+
514520
def ARM : Target {
515521
// Pull in Instruction Info:
516522
let InstructionSet = ARMInstrInfo;
517523
let AssemblyWriters = [ARMAsmWriter];
524+
let AssemblyParserVariants = [ARMAsmParserVariant];
518525
}

‎llvm/lib/Target/BPF/BPF.td

+7
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,14 @@ def BPFInstPrinter : AsmWriter {
2525
bit isMCAsmWriter = 1;
2626
}
2727

28+
def BPFAsmParserVariant : AsmParserVariant {
29+
int Variant = 0;
30+
string Name = "BPF";
31+
string BreakCharacters = ".";
32+
}
33+
2834
def BPF : Target {
2935
let InstructionSet = BPFInstrInfo;
3036
let AssemblyWriters = [BPFInstPrinter];
37+
let AssemblyParserVariants = [BPFAsmParserVariant];
3138
}

‎llvm/lib/Target/PowerPC/PPC.td

+1
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ def PPCAsmParserVariant : AsmParserVariant {
403403
// InstAlias definitions use immediate literals. Set RegisterPrefix
404404
// so that those are not misinterpreted as registers.
405405
string RegisterPrefix = "%";
406+
string BreakCharacters = ".";
406407
}
407408

408409
def PPC : Target {

‎llvm/utils/TableGen/AsmMatcherEmitter.cpp

+68-47
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,13 @@ struct ClassInfo {
294294
}
295295
};
296296

297+
class AsmVariantInfo {
298+
public:
299+
std::string TokenizingCharacters;
300+
std::string SeparatorCharacters;
301+
std::string BreakCharacters;
302+
};
303+
297304
/// MatchableInfo - Helper class for storing the necessary information for an
298305
/// instruction or alias which is capable of being matched.
299306
struct MatchableInfo {
@@ -484,7 +491,8 @@ struct MatchableInfo {
484491

485492
void initialize(const AsmMatcherInfo &Info,
486493
SmallPtrSetImpl<Record*> &SingletonRegisters,
487-
int AsmVariantNo, StringRef RegisterPrefix);
494+
int AsmVariantNo, StringRef RegisterPrefix,
495+
AsmVariantInfo const &Variant);
488496

489497
/// validate - Return true if this matchable is a valid thing to match against
490498
/// and perform a bunch of validity checking.
@@ -584,8 +592,10 @@ struct MatchableInfo {
584592
void dump() const;
585593

586594
private:
587-
void tokenizeAsmString(const AsmMatcherInfo &Info);
588-
void addAsmOperand(size_t Start, size_t End);
595+
void tokenizeAsmString(AsmMatcherInfo const &Info,
596+
AsmVariantInfo const &Variant);
597+
void addAsmOperand(size_t Start, size_t End,
598+
std::string const &SeparatorCharacters);
589599
};
590600

591601
/// SubtargetFeatureInfo - Helper class for storing information on a subtarget
@@ -828,12 +838,13 @@ extractSingletonRegisterForAsmOperand(MatchableInfo::AsmOperand &Op,
828838

829839
void MatchableInfo::initialize(const AsmMatcherInfo &Info,
830840
SmallPtrSetImpl<Record*> &SingletonRegisters,
831-
int AsmVariantNo, StringRef RegisterPrefix) {
841+
int AsmVariantNo, StringRef RegisterPrefix,
842+
AsmVariantInfo const &Variant) {
832843
AsmVariantID = AsmVariantNo;
833844
AsmString =
834845
CodeGenInstruction::FlattenAsmStringVariants(AsmString, AsmVariantNo);
835846

836-
tokenizeAsmString(Info);
847+
tokenizeAsmString(Info, Variant);
837848

838849
// Compute the require features.
839850
for (Record *Predicate : TheDef->getValueAsListOfDefs("Predicates"))
@@ -857,9 +868,9 @@ void MatchableInfo::initialize(const AsmMatcherInfo &Info,
857868
}
858869

859870
/// Append an AsmOperand for the given substring of AsmString.
860-
void MatchableInfo::addAsmOperand(size_t Start, size_t End) {
871+
void MatchableInfo::addAsmOperand(size_t Start, size_t End,
872+
std::string const &Separators) {
861873
StringRef String = AsmString;
862-
StringRef Separators = "[]*! \t,";
863874
// Look for separators before and after to figure out is this token is
864875
// isolated. Accept '$$' as that's how we escape '$'.
865876
bool IsIsolatedToken =
@@ -870,42 +881,54 @@ void MatchableInfo::addAsmOperand(size_t Start, size_t End) {
870881
}
871882

872883
/// tokenizeAsmString - Tokenize a simplified assembly string.
873-
void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) {
884+
void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info,
885+
AsmVariantInfo const &Variant) {
874886
StringRef String = AsmString;
875-
size_t Prev = 0;
876-
bool InTok = true;
877-
for (size_t i = 0, e = String.size(); i != e; ++i) {
878-
switch (String[i]) {
879-
case '[':
880-
case ']':
881-
case '*':
882-
case '!':
883-
case ' ':
884-
case '\t':
885-
case ',':
886-
if (InTok) {
887-
addAsmOperand(Prev, i);
887+
unsigned Prev = 0;
888+
bool InTok = false;
889+
std::string Separators = Variant.TokenizingCharacters +
890+
Variant.SeparatorCharacters;
891+
for (unsigned i = 0, e = String.size(); i != e; ++i) {
892+
if(Variant.BreakCharacters.find(String[i]) != std::string::npos) {
893+
if(InTok) {
894+
addAsmOperand(Prev, i, Separators);
895+
Prev = i;
896+
}
897+
InTok = true;
898+
continue;
899+
}
900+
if(Variant.TokenizingCharacters.find(String[i]) != std::string::npos) {
901+
if(InTok) {
902+
addAsmOperand(Prev, i, Separators);
888903
InTok = false;
889904
}
890-
if (!isspace(String[i]) && String[i] != ',')
891-
addAsmOperand(i, i + 1);
905+
addAsmOperand(i, i + 1, Separators);
892906
Prev = i + 1;
893-
break;
894-
907+
continue;
908+
}
909+
if(Variant.SeparatorCharacters.find(String[i]) != std::string::npos) {
910+
if(InTok) {
911+
addAsmOperand(Prev, i, Separators);
912+
InTok = false;
913+
}
914+
Prev = i + 1;
915+
continue;
916+
}
917+
switch (String[i]) {
895918
case '\\':
896919
if (InTok) {
897-
addAsmOperand(Prev, i);
920+
addAsmOperand(Prev, i, Separators);
898921
InTok = false;
899922
}
900923
++i;
901924
assert(i != String.size() && "Invalid quoted character");
902-
addAsmOperand(i, i + 1);
925+
addAsmOperand(i, i + 1, Separators);
903926
Prev = i + 1;
904927
break;
905928

906929
case '$': {
907-
if (InTok) {
908-
addAsmOperand(Prev, i);
930+
if (InTok && Prev != i) {
931+
addAsmOperand(Prev, i, Separators);
909932
InTok = false;
910933
}
911934

@@ -915,31 +938,20 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) {
915938
break;
916939
}
917940

918-
// If this is "${" find the next "}" and make an identifier like "${xxx}"
919-
size_t EndPos = String.find('}', i);
920-
assert(EndPos != StringRef::npos &&
921-
"Missing brace in operand reference!");
922-
addAsmOperand(i, EndPos+1);
941+
StringRef::iterator End = std::find(String.begin() + i, String.end(),'}');
942+
assert(End != String.end() && "Missing brace in operand reference!");
943+
size_t EndPos = End - String.begin();
944+
addAsmOperand(i, EndPos+1, Separators);
923945
Prev = EndPos + 1;
924946
i = EndPos;
925947
break;
926948
}
927-
928-
case '.':
929-
if (!Info.AsmParser->getValueAsBit("MnemonicContainsDot")) {
930-
if (InTok)
931-
addAsmOperand(Prev, i);
932-
Prev = i;
933-
}
934-
InTok = true;
935-
break;
936-
937949
default:
938950
InTok = true;
939951
}
940952
}
941953
if (InTok && Prev != String.size())
942-
addAsmOperand(Prev, StringRef::npos);
954+
addAsmOperand(Prev, StringRef::npos, Separators);
943955

944956
// The first token of the instruction is the mnemonic, which must be a
945957
// simple string, not a $foo variable or a singleton register.
@@ -1373,6 +1385,13 @@ void AsmMatcherInfo::buildInfo() {
13731385
std::string CommentDelimiter =
13741386
AsmVariant->getValueAsString("CommentDelimiter");
13751387
std::string RegisterPrefix = AsmVariant->getValueAsString("RegisterPrefix");
1388+
AsmVariantInfo Variant;
1389+
Variant.TokenizingCharacters =
1390+
AsmVariant->getValueAsString("TokenizingCharacters");
1391+
Variant.SeparatorCharacters =
1392+
AsmVariant->getValueAsString("SeparatorCharacters");
1393+
Variant.BreakCharacters =
1394+
AsmVariant->getValueAsString("BreakCharacters");
13761395
int AsmVariantNo = AsmVariant->getValueAsInt("Variant");
13771396

13781397
for (const CodeGenInstruction *CGI : Target.instructions()) {
@@ -1388,7 +1407,8 @@ void AsmMatcherInfo::buildInfo() {
13881407

13891408
auto II = llvm::make_unique<MatchableInfo>(*CGI);
13901409

1391-
II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
1410+
II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix,
1411+
Variant);
13921412

13931413
// Ignore instructions which shouldn't be matched and diagnose invalid
13941414
// instruction definitions with an error.
@@ -1415,7 +1435,8 @@ void AsmMatcherInfo::buildInfo() {
14151435

14161436
auto II = llvm::make_unique<MatchableInfo>(std::move(Alias));
14171437

1418-
II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
1438+
II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix,
1439+
Variant);
14191440

14201441
// Validate the alias definitions.
14211442
II->validate(CommentDelimiter, false);

0 commit comments

Comments
 (0)
Please sign in to comment.