diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -648,6 +648,7 @@ include "AArch64InstrFormats.td" include "SVEInstrFormats.td" +include "SMEInstrFormats.td" //===----------------------------------------------------------------------===// @@ -8115,5 +8116,5 @@ include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" - +include "AArch64SMEInstrInfo.td" include "AArch64InstrGISel.td" diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -45,6 +45,16 @@ def qsub1 : SubRegIndex<128>; def qsub2 : SubRegIndex<128>; def qsub3 : SubRegIndex<128>; + // Note: Code depends on these having consecutive numbers + def zasubb : SubRegIndex<2048>; // (16 x 16)/1 bytes = 2048 bits + def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits + def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits + def zasubs0 : SubRegIndex<512>; // (16 x 16)/4 bytes = 512 bits + def zasubs1 : SubRegIndex<512>; // (16 x 16)/4 bytes = 512 bits + def zasubd0 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits + def zasubd1 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits + def zasubq0 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits + def zasubq1 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits } let Namespace = "AArch64" in { @@ -1156,3 +1166,188 @@ def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">; def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>; } + +// Accumulator array tiles. +def ZAQ0 : AArch64Reg<0, "za0.q">; +def ZAQ1 : AArch64Reg<1, "za1.q">; +def ZAQ2 : AArch64Reg<2, "za2.q">; +def ZAQ3 : AArch64Reg<3, "za3.q">; +def ZAQ4 : AArch64Reg<4, "za4.q">; +def ZAQ5 : AArch64Reg<5, "za5.q">; +def ZAQ6 : AArch64Reg<6, "za6.q">; +def ZAQ7 : AArch64Reg<7, "za7.q">; +def ZAQ8 : AArch64Reg<8, "za8.q">; +def ZAQ9 : AArch64Reg<9, "za9.q">; +def ZAQ10 : AArch64Reg<10, "za10.q">; +def ZAQ11 : AArch64Reg<11, "za11.q">; +def ZAQ12 : AArch64Reg<12, "za12.q">; +def ZAQ13 : AArch64Reg<13, "za13.q">; +def ZAQ14 : AArch64Reg<14, "za14.q">; +def ZAQ15 : AArch64Reg<15, "za15.q">; + +let SubRegIndices = [zasubq0, zasubq1] in { + def ZAD0 : AArch64Reg<0, "za0.d", [ZAQ0, ZAQ8]>; + def ZAD1 : AArch64Reg<1, "za1.d", [ZAQ1, ZAQ9]>; + def ZAD2 : AArch64Reg<2, "za2.d", [ZAQ2, ZAQ10]>; + def ZAD3 : AArch64Reg<3, "za3.d", [ZAQ3, ZAQ11]>; + def ZAD4 : AArch64Reg<4, "za4.d", [ZAQ4, ZAQ12]>; + def ZAD5 : AArch64Reg<5, "za5.d", [ZAQ5, ZAQ13]>; + def ZAD6 : AArch64Reg<6, "za6.d", [ZAQ6, ZAQ14]>; + def ZAD7 : AArch64Reg<7, "za7.d", [ZAQ7, ZAQ15]>; +} + +let SubRegIndices = [zasubd0, zasubd1] in { + def ZAS0 : AArch64Reg<0, "za0.s", [ZAD0, ZAD4]>; + def ZAS1 : AArch64Reg<1, "za1.s", [ZAD1, ZAD5]>; + def ZAS2 : AArch64Reg<2, "za2.s", [ZAD2, ZAD6]>; + def ZAS3 : AArch64Reg<3, "za3.s", [ZAD3, ZAD7]>; +} + +let SubRegIndices = [zasubs0, zasubs1] in { + def ZAH0 : AArch64Reg<0, "za0.h", [ZAS0, ZAS2]>; + def ZAH1 : AArch64Reg<1, "za1.h", [ZAS1, ZAS3]>; +} + +let SubRegIndices = [zasubh0, zasubh1] in { + def ZAB0 : AArch64Reg<0, "za0.b", [ZAH0, ZAH1]>; +} + +let SubRegIndices = [zasubb] in { + def ZA : AArch64Reg<0, "za", [ZAB0]>; +} + +// SME Register Classes + +// Accumulator array +def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> { + let Size = 2048; +} + +// Accumulator array as single tiles +def MPR8 : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> { + let Size = 2048; +} +def MPR16 : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> { + let Size = 1024; +} +def MPR32 : RegisterClass<"AArch64", [untyped], 512, (add (sequence "ZAS%u", 0, 3))> { + let Size = 512; +} +def MPR64 : RegisterClass<"AArch64", [untyped], 256, (add (sequence "ZAD%u", 0, 7))> { + let Size = 256; +} +def MPR128 : RegisterClass<"AArch64", [untyped], 128, (add (sequence "ZAQ%u", 0, 15))> { + let Size = 128; +} + +// SME Register Operands +// There are three types of SME matrix register operands: +// * Tiles: +// +// These tiles make up the larger accumulator matrix. The tile representation +// has an element type suffix, e.g. za0.b or za15.q and can be any of the +// registers: +// ZAQ0..ZAQ15 +// ZAD0..ZAD7 +// ZAS0..ZAS3 +// ZAH0..ZAH1 +// or ZAB0 +// +// * Tile vectors: +// +// Their representation is similar to regular tiles, but they have an extra +// 'h' or 'v' to tell how the vector at [reg+offset] is layed out in the tile, +// horizontally or vertically. +// +// e.g. za1h.h or za15v.q, which corresponds to vectors in registers ZAH1 and +// ZAQ15, respectively. The horizontal/vertical is more a property of the +// instruction, than a property of the asm-operand itself, or its register. +// The distinction is required for the parsing/printing of the operand, +// as from a compiler's perspective, the whole tile is read/written. +// +// * Accumulator matrix: +// +// This is the entire matrix accumulator register ZA (<=> ZAB0), printed as +// 'za'. + +// +// Tiles +// + +class MatrixTileAsmOperand : AsmOperandClass { + let Name = "MatrixTile" # EltSize; + let DiagnosticType = "Invalid" # Name; + let ParserMethod = "tryParseMatrixRegister"; + let RenderMethod = "addMatrixOperands"; + let PredicateMethod = "isMatrixRegOperand<" + # "MatrixKind::Tile" # ", " + # EltSize # ", AArch64::" # RC # "RegClassID>"; +} + +class MatrixTileOperand + : RegisterOperand { + let ParserMatchClass = MatrixTileAsmOperand(RC), EltSize>; + let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">"; + let PrintMethod = "printMatrixTile"; +} + +def TileOp32 : MatrixTileOperand<32, 2, MPR32>; +def TileOp64 : MatrixTileOperand<64, 3, MPR64>; + +// +// Tile vectors (horizontal and vertical) +// + +class MatrixTileVectorAsmOperand + : AsmOperandClass { + let Name = "MatrixTileVector" # !if(IsVertical, "V", "H") # EltSize; + let DiagnosticType = "Invalid" # Name; + let ParserMethod = "tryParseMatrixRegister"; + let RenderMethod = "addMatrixOperands"; + let PredicateMethod = "isMatrixRegOperand<" + # "MatrixKind::" + # !if(IsVertical, "Col", "Row") # ", " + # EltSize # ", AArch64::" # RC # "RegClassID>"; +} + +class MatrixTileVectorOperand + : RegisterOperand { + let ParserMatchClass = MatrixTileVectorAsmOperand(RC), EltSize, + IsVertical>; + let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">"; + let PrintMethod = "printMatrixTileVector<" # IsVertical # ">"; +} + +def TileVectorOpH8 : MatrixTileVectorOperand< 8, 0, MPR8, 0>; +def TileVectorOpH16 : MatrixTileVectorOperand< 16, 1, MPR16, 0>; +def TileVectorOpH32 : MatrixTileVectorOperand< 32, 2, MPR32, 0>; +def TileVectorOpH64 : MatrixTileVectorOperand< 64, 3, MPR64, 0>; +def TileVectorOpH128 : MatrixTileVectorOperand<128, 4, MPR128, 0>; + +def TileVectorOpV8 : MatrixTileVectorOperand< 8, 0, MPR8, 1>; +def TileVectorOpV16 : MatrixTileVectorOperand< 16, 1, MPR16, 1>; +def TileVectorOpV32 : MatrixTileVectorOperand< 32, 2, MPR32, 1>; +def TileVectorOpV64 : MatrixTileVectorOperand< 64, 3, MPR64, 1>; +def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>; + +// +// Accumulator matrix +// + +class MatrixAsmOperand : AsmOperandClass { + let Name = "Matrix"; + let DiagnosticType = "Invalid" # Name; + let ParserMethod = "tryParseMatrixRegister"; + let RenderMethod = "addMatrixOperands"; + let PredicateMethod = "isMatrixRegOperand<" + # "MatrixKind::Array" # ", " + # EltSize # ", AArch64::" # RC # "RegClassID>"; +} + +class MatrixOperand : RegisterOperand { + let ParserMatchClass = MatrixAsmOperand(RC), EltSize>; + let PrintMethod = "printMatrix<" # EltSize # ">"; +} + +def MatrixOp : MatrixOperand; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -0,0 +1,25 @@ +//=- AArch64SMEInstrInfo.td - AArch64 SME Instructions -*- tablegen -*-----=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// AArch64 Scalable Matrix Extension (SME) Instruction definitions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Add vector elements horizontally or vertically to ZA tile. +//===----------------------------------------------------------------------===// + +let Predicates = [HasSME] in { +def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">; +def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">; +} + +let Predicates = [HasSMEI64] in { +def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">; +def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">; +} diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -66,9 +66,12 @@ Scalar, NeonVector, SVEDataVector, - SVEPredicateVector + SVEPredicateVector, + Matrix }; +enum class MatrixKind { Array, Tile, Row, Col }; + enum RegConstraintEqualityTy { EqualsReg, EqualsSuperReg, @@ -229,6 +232,7 @@ OperandMatchResultTy tryParseScalarRegister(unsigned &Reg); OperandMatchResultTy tryParseVectorRegister(unsigned &Reg, StringRef &Kind, RegKind MatchKind); + OperandMatchResultTy tryParseMatrixRegister(OperandVector &Operands); OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands); OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands); OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands); @@ -316,6 +320,7 @@ k_ShiftedImm, k_CondCode, k_Register, + k_MatrixRegister, k_VectorList, k_VectorIndex, k_Token, @@ -370,6 +375,12 @@ ShiftExtendOp ShiftExtend; }; + struct MatrixRegOp { + unsigned RegNum; + unsigned ElementWidth; + MatrixKind Kind; + }; + struct VectorListOp { unsigned RegNum; unsigned Count; @@ -440,6 +451,7 @@ union { struct TokOp Tok; struct RegOp Reg; + struct MatrixRegOp MatrixReg; struct VectorListOp VectorList; struct VectorIndexOp VectorIndex; struct ImmOp Imm; @@ -488,6 +500,9 @@ case k_Register: Reg = o.Reg; break; + case k_MatrixRegister: + MatrixReg = o.MatrixReg; + break; case k_VectorList: VectorList = o.VectorList; break; @@ -580,6 +595,21 @@ return Reg.RegNum; } + unsigned getMatrixReg() const { + assert(Kind == k_MatrixRegister && "Invalid access!"); + return MatrixReg.RegNum; + } + + unsigned getMatrixElementWidth() const { + assert(Kind == k_MatrixRegister && "Invalid access!"); + return MatrixReg.ElementWidth; + } + + MatrixKind getMatrixKind() const { + assert(Kind == k_MatrixRegister && "Invalid access!"); + return MatrixReg.Kind; + } + RegConstraintEqualityTy getRegEqualityTy() const { assert(Kind == k_Register && "Invalid access!"); return Reg.EqualityTy; @@ -1089,6 +1119,8 @@ Reg.RegNum)); } + bool isMatrix() const { return Kind == k_MatrixRegister; } + template bool isSVEVectorReg() const { RegKind RK; switch (Class) { @@ -1470,6 +1502,15 @@ return true; } + template + DiagnosticPredicate isMatrixRegOperand() const { + if (isMatrix() && getMatrixKind() == Kind && + AArch64MCRegisterClasses[RegClass].contains(getMatrixReg()) && + EltSize == getMatrixElementWidth()) + return DiagnosticPredicateTy::Match; + return DiagnosticPredicateTy::NoMatch; + } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. if (!Expr) @@ -1485,6 +1526,11 @@ Inst.addOperand(MCOperand::createReg(getReg())); } + void addMatrixOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getMatrixReg())); + } + void addGPR32as64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); assert( @@ -2054,6 +2100,18 @@ return Op; } + static std::unique_ptr + CreateMatrixRegister(unsigned RegNum, unsigned ElementWidth, MatrixKind Kind, + SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = std::make_unique(k_MatrixRegister, Ctx); + Op->MatrixReg.RegNum = RegNum; + Op->MatrixReg.ElementWidth = ElementWidth; + Op->MatrixReg.Kind = Kind; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static std::unique_ptr CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val, bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) { @@ -2132,6 +2190,9 @@ case k_BTIHint: OS << getBTIHintName(); break; + case k_MatrixRegister: + OS << ""; + break; case k_Register: OS << ""; if (!getShiftExtendAmount() && !hasShiftExtendAmount()) @@ -2229,6 +2290,7 @@ break; case RegKind::SVEPredicateVector: case RegKind::SVEDataVector: + case RegKind::Matrix: Res = StringSwitch>(Suffix.lower()) .Case("", {0, 0}) .Case(".b", {0, 8}) @@ -2310,6 +2372,105 @@ .Default(0); } +static unsigned matchMatrixRegName(StringRef Name) { + return StringSwitch(Name.lower()) + .Case("za", AArch64::ZA) + .Case("za0.q", AArch64::ZAQ0) + .Case("za1.q", AArch64::ZAQ1) + .Case("za2.q", AArch64::ZAQ2) + .Case("za3.q", AArch64::ZAQ3) + .Case("za4.q", AArch64::ZAQ4) + .Case("za5.q", AArch64::ZAQ5) + .Case("za6.q", AArch64::ZAQ6) + .Case("za7.q", AArch64::ZAQ7) + .Case("za8.q", AArch64::ZAQ8) + .Case("za9.q", AArch64::ZAQ9) + .Case("za10.q", AArch64::ZAQ10) + .Case("za11.q", AArch64::ZAQ11) + .Case("za12.q", AArch64::ZAQ12) + .Case("za13.q", AArch64::ZAQ13) + .Case("za14.q", AArch64::ZAQ14) + .Case("za15.q", AArch64::ZAQ15) + .Case("za0.d", AArch64::ZAD0) + .Case("za1.d", AArch64::ZAD1) + .Case("za2.d", AArch64::ZAD2) + .Case("za3.d", AArch64::ZAD3) + .Case("za4.d", AArch64::ZAD4) + .Case("za5.d", AArch64::ZAD5) + .Case("za6.d", AArch64::ZAD6) + .Case("za7.d", AArch64::ZAD7) + .Case("za0.s", AArch64::ZAS0) + .Case("za1.s", AArch64::ZAS1) + .Case("za2.s", AArch64::ZAS2) + .Case("za3.s", AArch64::ZAS3) + .Case("za0.h", AArch64::ZAH0) + .Case("za1.h", AArch64::ZAH1) + .Case("za0.b", AArch64::ZAB0) + .Case("za0h.q", AArch64::ZAQ0) + .Case("za1h.q", AArch64::ZAQ1) + .Case("za2h.q", AArch64::ZAQ2) + .Case("za3h.q", AArch64::ZAQ3) + .Case("za4h.q", AArch64::ZAQ4) + .Case("za5h.q", AArch64::ZAQ5) + .Case("za6h.q", AArch64::ZAQ6) + .Case("za7h.q", AArch64::ZAQ7) + .Case("za8h.q", AArch64::ZAQ8) + .Case("za9h.q", AArch64::ZAQ9) + .Case("za10h.q", AArch64::ZAQ10) + .Case("za11h.q", AArch64::ZAQ11) + .Case("za12h.q", AArch64::ZAQ12) + .Case("za13h.q", AArch64::ZAQ13) + .Case("za14h.q", AArch64::ZAQ14) + .Case("za15h.q", AArch64::ZAQ15) + .Case("za0h.d", AArch64::ZAD0) + .Case("za1h.d", AArch64::ZAD1) + .Case("za2h.d", AArch64::ZAD2) + .Case("za3h.d", AArch64::ZAD3) + .Case("za4h.d", AArch64::ZAD4) + .Case("za5h.d", AArch64::ZAD5) + .Case("za6h.d", AArch64::ZAD6) + .Case("za7h.d", AArch64::ZAD7) + .Case("za0h.s", AArch64::ZAS0) + .Case("za1h.s", AArch64::ZAS1) + .Case("za2h.s", AArch64::ZAS2) + .Case("za3h.s", AArch64::ZAS3) + .Case("za0h.h", AArch64::ZAH0) + .Case("za1h.h", AArch64::ZAH1) + .Case("za0h.b", AArch64::ZAB0) + .Case("za0v.q", AArch64::ZAQ0) + .Case("za1v.q", AArch64::ZAQ1) + .Case("za2v.q", AArch64::ZAQ2) + .Case("za3v.q", AArch64::ZAQ3) + .Case("za4v.q", AArch64::ZAQ4) + .Case("za5v.q", AArch64::ZAQ5) + .Case("za6v.q", AArch64::ZAQ6) + .Case("za7v.q", AArch64::ZAQ7) + .Case("za8v.q", AArch64::ZAQ8) + .Case("za9v.q", AArch64::ZAQ9) + .Case("za10v.q", AArch64::ZAQ10) + .Case("za11v.q", AArch64::ZAQ11) + .Case("za12v.q", AArch64::ZAQ12) + .Case("za13v.q", AArch64::ZAQ13) + .Case("za14v.q", AArch64::ZAQ14) + .Case("za15v.q", AArch64::ZAQ15) + .Case("za0v.d", AArch64::ZAD0) + .Case("za1v.d", AArch64::ZAD1) + .Case("za2v.d", AArch64::ZAD2) + .Case("za3v.d", AArch64::ZAD3) + .Case("za4v.d", AArch64::ZAD4) + .Case("za5v.d", AArch64::ZAD5) + .Case("za6v.d", AArch64::ZAD6) + .Case("za7v.d", AArch64::ZAD7) + .Case("za0v.s", AArch64::ZAS0) + .Case("za1v.s", AArch64::ZAS1) + .Case("za2v.s", AArch64::ZAS2) + .Case("za3v.s", AArch64::ZAS3) + .Case("za0v.h", AArch64::ZAH0) + .Case("za1v.h", AArch64::ZAH1) + .Case("za0v.b", AArch64::ZAB0) + .Default(0); +} + bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success; @@ -2337,6 +2498,9 @@ if ((RegNum = MatchNeonVectorRegName(Name))) return Kind == RegKind::NeonVector ? RegNum : 0; + if ((RegNum = matchMatrixRegName(Name))) + return Kind == RegKind::Matrix ? RegNum : 0; + // The parsed register must be of RegKind Scalar if ((RegNum = MatchRegisterName(Name))) return Kind == RegKind::Scalar ? RegNum : 0; @@ -2809,6 +2973,54 @@ return false; } +OperandMatchResultTy +AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + const AsmToken &Tok = Parser.getTok(); + SMLoc S = getLoc(); + + StringRef Name = Tok.getString(); + + if (Name.equals_insensitive("za")) { + Parser.Lex(); // eat "za" + Operands.push_back(AArch64Operand::CreateMatrixRegister( + AArch64::ZA, /*ElementWidth=*/0, MatrixKind::Array, S, getLoc(), + getContext())); + return MatchOperand_Success; + } + + // Try to parse matrix register. + unsigned Reg = matchRegisterNameAlias(Name, RegKind::Matrix); + if (!Reg) + return MatchOperand_NoMatch; + + size_t DotPosition = Name.find('.'); + assert(DotPosition != StringRef::npos && "Unexpected register"); + + StringRef Head = Name.take_front(DotPosition); + StringRef Tail = Name.drop_front(DotPosition); + StringRef RowOrColumn = Head.take_back(); + + MatrixKind Kind = StringSwitch(RowOrColumn) + .Case("h", MatrixKind::Row) + .Case("v", MatrixKind::Col) + .Default(MatrixKind::Tile); + + // Next up, parsing the suffix + const auto &KindRes = parseVectorKind(Tail, RegKind::Matrix); + if (!KindRes) { + TokError("Expected the register to be followed by element width suffix"); + return MatchOperand_ParseFail; + } + unsigned ElementWidth = KindRes->second; + + Parser.Lex(); + + Operands.push_back(AArch64Operand::CreateMatrixRegister( + Reg, ElementWidth, Kind, S, getLoc(), getContext())); + return MatchOperand_Success; +} + /// tryParseOptionalShift - Some operands take an optional shift argument. Parse /// them if present. OperandMatchResultTy @@ -4733,6 +4945,32 @@ return Error(Loc, "Invalid floating point constant, expected 0.5 or 2.0."); case Match_InvalidSVEExactFPImmOperandZeroOne: return Error(Loc, "Invalid floating point constant, expected 0.0 or 1.0."); + case Match_InvalidMatrixTileVectorH8: + return Error(Loc, "invalid matrix operand, expected za0h.b"); + case Match_InvalidMatrixTileVectorH16: + return Error(Loc, "invalid matrix operand, expected za[0-1]h.h"); + case Match_InvalidMatrixTileVectorH32: + return Error(Loc, "invalid matrix operand, expected za[0-3]h.s"); + case Match_InvalidMatrixTileVectorH64: + return Error(Loc, "invalid matrix operand, expected za[0-7]h.d"); + case Match_InvalidMatrixTileVectorH128: + return Error(Loc, "invalid matrix operand, expected za[0-15]h.q"); + case Match_InvalidMatrixTileVectorV8: + return Error(Loc, "invalid matrix operand, expected za0v.b"); + case Match_InvalidMatrixTileVectorV16: + return Error(Loc, "invalid matrix operand, expected za[0-1]v.h"); + case Match_InvalidMatrixTileVectorV32: + return Error(Loc, "invalid matrix operand, expected za[0-3]v.s"); + case Match_InvalidMatrixTileVectorV64: + return Error(Loc, "invalid matrix operand, expected za[0-7]v.d"); + case Match_InvalidMatrixTileVectorV128: + return Error(Loc, "invalid matrix operand, expected za[0-15]v.q"); + case Match_InvalidMatrixTile32: + return Error(Loc, "invalid matrix operand, expected za[0-3].s"); + case Match_InvalidMatrixTile64: + return Error(Loc, "invalid matrix operand, expected za[0-7].d"); + case Match_InvalidMatrix: + return Error(Loc, "invalid matrix operand, expected za"); default: llvm_unreachable("unexpected error code!"); } @@ -5251,6 +5489,19 @@ case Match_InvalidSVEExactFPImmOperandHalfOne: case Match_InvalidSVEExactFPImmOperandHalfTwo: case Match_InvalidSVEExactFPImmOperandZeroOne: + case Match_InvalidMatrixTile32: + case Match_InvalidMatrixTile64: + case Match_InvalidMatrix: + case Match_InvalidMatrixTileVectorH8: + case Match_InvalidMatrixTileVectorH16: + case Match_InvalidMatrixTileVectorH32: + case Match_InvalidMatrixTileVectorH64: + case Match_InvalidMatrixTileVectorH128: + case Match_InvalidMatrixTileVectorV8: + case Match_InvalidMatrixTileVectorV16: + case Match_InvalidMatrixTileVectorV32: + case Match_InvalidMatrixTileVectorV64: + case Match_InvalidMatrixTileVectorV128: case Match_MSR: case Match_MRS: { if (ErrorInfo >= Operands.size()) diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -111,6 +111,9 @@ static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +template +static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); @@ -642,6 +645,29 @@ return Success; } +static const SmallVector, 5> + MatrixZATileDecoderTable = { + {AArch64::ZAB0}, + {AArch64::ZAH0, AArch64::ZAH1}, + {AArch64::ZAS0, AArch64::ZAS1, AArch64::ZAS2, AArch64::ZAS3}, + {AArch64::ZAD0, AArch64::ZAD1, AArch64::ZAD2, AArch64::ZAD3, + AArch64::ZAD4, AArch64::ZAD5, AArch64::ZAD6, AArch64::ZAD7}, + {AArch64::ZAQ0, AArch64::ZAQ1, AArch64::ZAQ2, AArch64::ZAQ3, + AArch64::ZAQ4, AArch64::ZAQ5, AArch64::ZAQ6, AArch64::ZAQ7, + AArch64::ZAQ8, AArch64::ZAQ9, AArch64::ZAQ10, AArch64::ZAQ11, + AArch64::ZAQ12, AArch64::ZAQ13, AArch64::ZAQ14, AArch64::ZAQ15}}; + +template +static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + unsigned LastReg = (1 << NumBitsForTile) - 1; + if (RegNo > LastReg) + return Fail; + Inst.addOperand( + MCOperand::createReg(MatrixZATileDecoderTable[NumBitsForTile][RegNo])); + return Success; +} + static const unsigned PPRDecoderTable[] = { AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4, AArch64::P5, AArch64::P6, AArch64::P7, diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -187,6 +187,15 @@ const MCSubtargetInfo &STI, raw_ostream &O); void printSVEPattern(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + + template + void printMatrixTileVector(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMatrixTile(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + template + void printMatrix(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O); template void printSVERegOp(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -880,6 +880,59 @@ return true; } +template +void AArch64InstPrinter::printMatrix(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &RegOp = MI->getOperand(OpNum); + assert(RegOp.isReg() && "Unexpected operand type!"); + + O << getRegisterName(RegOp.getReg()); + switch (EltSize) { + case 0: + break; + case 8: + O << ".b"; + break; + case 16: + O << ".h"; + break; + case 32: + O << ".s"; + break; + case 64: + O << ".d"; + break; + case 128: + O << ".q"; + break; + default: + llvm_unreachable("Unsupported element size"); + } +} + +template +void AArch64InstPrinter::printMatrixTileVector(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &RegOp = MI->getOperand(OpNum); + assert(RegOp.isReg() && "Unexpected operand type!"); + StringRef RegName = getRegisterName(RegOp.getReg()); + + // Insert the horizontal/vertical flag before the suffix. + StringRef Base, Suffix; + std::tie(Base, Suffix) = RegName.split('.'); + O << Base << (IsVertical ? "v" : "h") << '.' << Suffix; +} + +void AArch64InstPrinter::printMatrixTile(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &RegOp = MI->getOperand(OpNum); + assert(RegOp.isReg() && "Unexpected operand type!"); + O << getRegisterName(RegOp.getReg()); +} + void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -0,0 +1,47 @@ +//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SME Add Vector to Tile +//===----------------------------------------------------------------------===// + +class sme_add_vector_to_tile_inst + : I<(outs tile_ty:$ZAda), + (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), + mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn", + "", []>, Sched<[]> { + bits<3> Pm; + bits<3> Pn; + bits<5> Zn; + let Inst{31-23} = 0b110000001; + let Inst{22} = op; + let Inst{21-17} = 0b01000; + let Inst{16} = V; + let Inst{15-13} = Pm; + let Inst{12-10} = Pn; + let Inst{9-5} = Zn; + let Inst{4-3} = 0b00; +} + +class sme_add_vector_to_tile_u32 + : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> { + bits<2> ZAda; + let Inst{2} = 0b0; + let Inst{1-0} = ZAda; +} + +class sme_add_vector_to_tile_u64 + : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> { + bits<3> ZAda; + let Inst{2-0} = ZAda; +} diff --git a/llvm/test/MC/AArch64/SME/addha-diagnostics.s b/llvm/test/MC/AArch64/SME/addha-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME/addha-diagnostics.s @@ -0,0 +1,52 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s + +// ------------------------------------------------------------------------- // +// Invalid tile + +addha za4.s, p0/m, p0/m, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: addha za4.s, p0/m, p0/m, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +addha za8.d, p0/m, p0/m, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: addha za8.d, p0/m, p0/m, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +addha za0h.s, p0/m, p0/m, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: addha za0h.s, p0/m, p0/m, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +addha za0v.s, p0/m, p0/m, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: addha za0v.s, p0/m, p0/m, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +addha za0p.s, p0/m, p0/m, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: addha za0p.s, p0/m, p0/m, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +addha za0.s, p8/m, p0/m, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: addha za0.s, p8/m, p0/m, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +addha za0.s, p0/m, p8/m, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: addha za0.s, p0/m, p8/m, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +addha za0.d, p8/m, p0/m, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: addha za0.d, p8/m, p0/m, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +addha za0.d, p0/m, p8/m, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: addha za0.d, p0/m, p8/m, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME/addha-u32.s b/llvm/test/MC/AArch64/SME/addha-u32.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME/addha-u32.s @@ -0,0 +1,85 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \ +// RUN: | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +addha za0.s, p0/m, p0/m, z0.s +// CHECK-INST: addha za0.s, p0/m, p0/m, z0.s +// CHECK-ENCODING: [0x00,0x00,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 00 00 90 c0 + +addha za1.s, p5/m, p2/m, z10.s +// CHECK-INST: addha za1.s, p5/m, p2/m, z10.s +// CHECK-ENCODING: [0x41,0x55,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 41 55 90 c0 + +addha za3.s, p3/m, p7/m, z13.s +// CHECK-INST: addha za3.s, p3/m, p7/m, z13.s +// CHECK-ENCODING: [0xa3,0xed,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: a3 ed 90 c0 + +addha za3.s, p7/m, p7/m, z31.s +// CHECK-INST: addha za3.s, p7/m, p7/m, z31.s +// CHECK-ENCODING: [0xe3,0xff,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: e3 ff 90 c0 + +addha za1.s, p3/m, p0/m, z17.s +// CHECK-INST: addha za1.s, p3/m, p0/m, z17.s +// CHECK-ENCODING: [0x21,0x0e,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 21 0e 90 c0 + +addha za1.s, p1/m, p4/m, z1.s +// CHECK-INST: addha za1.s, p1/m, p4/m, z1.s +// CHECK-ENCODING: [0x21,0x84,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 21 84 90 c0 + +addha za0.s, p5/m, p2/m, z19.s +// CHECK-INST: addha za0.s, p5/m, p2/m, z19.s +// CHECK-ENCODING: [0x60,0x56,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 60 56 90 c0 + +addha za0.s, p6/m, p0/m, z12.s +// CHECK-INST: addha za0.s, p6/m, p0/m, z12.s +// CHECK-ENCODING: [0x80,0x19,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 80 19 90 c0 + +addha za1.s, p2/m, p6/m, z1.s +// CHECK-INST: addha za1.s, p2/m, p6/m, z1.s +// CHECK-ENCODING: [0x21,0xc8,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 21 c8 90 c0 + +addha za1.s, p2/m, p0/m, z22.s +// CHECK-INST: addha za1.s, p2/m, p0/m, z22.s +// CHECK-ENCODING: [0xc1,0x0a,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: c1 0a 90 c0 + +addha za2.s, p5/m, p7/m, z9.s +// CHECK-INST: addha za2.s, p5/m, p7/m, z9.s +// CHECK-ENCODING: [0x22,0xf5,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 22 f5 90 c0 + +addha za3.s, p2/m, p5/m, z12.s +// CHECK-INST: addha za3.s, p2/m, p5/m, z12.s +// CHECK-ENCODING: [0x83,0xa9,0x90,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 83 a9 90 c0 diff --git a/llvm/test/MC/AArch64/SME/addha-u64.s b/llvm/test/MC/AArch64/SME/addha-u64.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME/addha-u64.s @@ -0,0 +1,85 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +addha za0.d, p0/m, p0/m, z0.d +// CHECK-INST: addha za0.d, p0/m, p0/m, z0.d +// CHECK-ENCODING: [0x00,0x00,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 00 00 d0 c0 + +addha za5.d, p5/m, p2/m, z10.d +// CHECK-INST: addha za5.d, p5/m, p2/m, z10.d +// CHECK-ENCODING: [0x45,0x55,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 45 55 d0 c0 + +addha za7.d, p3/m, p7/m, z13.d +// CHECK-INST: addha za7.d, p3/m, p7/m, z13.d +// CHECK-ENCODING: [0xa7,0xed,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: a7 ed d0 c0 + +addha za7.d, p7/m, p7/m, z31.d +// CHECK-INST: addha za7.d, p7/m, p7/m, z31.d +// CHECK-ENCODING: [0xe7,0xff,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: e7 ff d0 c0 + +addha za5.d, p3/m, p0/m, z17.d +// CHECK-INST: addha za5.d, p3/m, p0/m, z17.d +// CHECK-ENCODING: [0x25,0x0e,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 25 0e d0 c0 + +addha za1.d, p1/m, p4/m, z1.d +// CHECK-INST: addha za1.d, p1/m, p4/m, z1.d +// CHECK-ENCODING: [0x21,0x84,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 21 84 d0 c0 + +addha za0.d, p5/m, p2/m, z19.d +// CHECK-INST: addha za0.d, p5/m, p2/m, z19.d +// CHECK-ENCODING: [0x60,0x56,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 60 56 d0 c0 + +addha za0.d, p6/m, p0/m, z12.d +// CHECK-INST: addha za0.d, p6/m, p0/m, z12.d +// CHECK-ENCODING: [0x80,0x19,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 80 19 d0 c0 + +addha za1.d, p2/m, p6/m, z1.d +// CHECK-INST: addha za1.d, p2/m, p6/m, z1.d +// CHECK-ENCODING: [0x21,0xc8,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 21 c8 d0 c0 + +addha za5.d, p2/m, p0/m, z22.d +// CHECK-INST: addha za5.d, p2/m, p0/m, z22.d +// CHECK-ENCODING: [0xc5,0x0a,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: c5 0a d0 c0 + +addha za2.d, p5/m, p7/m, z9.d +// CHECK-INST: addha za2.d, p5/m, p7/m, z9.d +// CHECK-ENCODING: [0x22,0xf5,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 22 f5 d0 c0 + +addha za7.d, p2/m, p5/m, z12.d +// CHECK-INST: addha za7.d, p2/m, p5/m, z12.d +// CHECK-ENCODING: [0x87,0xa9,0xd0,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 87 a9 d0 c0 diff --git a/llvm/test/MC/AArch64/SME/addva-diagnostics.s b/llvm/test/MC/AArch64/SME/addva-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME/addva-diagnostics.s @@ -0,0 +1,37 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+sme-i64 2>&1 < %s| FileCheck %s + +// ------------------------------------------------------------------------- // +// Invalid tile + +addva za4.s, p0/m, p0/m, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: addva za4.s, p0/m, p0/m, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +addva za8.d, p0/m, p0/m, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: addva za8.d, p0/m, p0/m, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +addva za0.s, p8/m, p0/m, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: addva za0.s, p8/m, p0/m, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +addva za0.s, p0/m, p8/m, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: addva za0.s, p0/m, p8/m, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +addva za0.d, p8/m, p0/m, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: addva za0.d, p8/m, p0/m, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +addva za0.d, p0/m, p8/m, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: addva za0.d, p0/m, p8/m, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME/addva-u32.s b/llvm/test/MC/AArch64/SME/addva-u32.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME/addva-u32.s @@ -0,0 +1,85 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \ +// RUN: | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +addva za0.s, p0/m, p0/m, z0.s +// CHECK-INST: addva za0.s, p0/m, p0/m, z0.s +// CHECK-ENCODING: [0x00,0x00,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 00 00 91 c0 + +addva za1.s, p5/m, p2/m, z10.s +// CHECK-INST: addva za1.s, p5/m, p2/m, z10.s +// CHECK-ENCODING: [0x41,0x55,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 41 55 91 c0 + +addva za3.s, p3/m, p7/m, z13.s +// CHECK-INST: addva za3.s, p3/m, p7/m, z13.s +// CHECK-ENCODING: [0xa3,0xed,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: a3 ed 91 c0 + +addva za3.s, p7/m, p7/m, z31.s +// CHECK-INST: addva za3.s, p7/m, p7/m, z31.s +// CHECK-ENCODING: [0xe3,0xff,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: e3 ff 91 c0 + +addva za1.s, p3/m, p0/m, z17.s +// CHECK-INST: addva za1.s, p3/m, p0/m, z17.s +// CHECK-ENCODING: [0x21,0x0e,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 21 0e 91 c0 + +addva za1.s, p1/m, p4/m, z1.s +// CHECK-INST: addva za1.s, p1/m, p4/m, z1.s +// CHECK-ENCODING: [0x21,0x84,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 21 84 91 c0 + +addva za0.s, p5/m, p2/m, z19.s +// CHECK-INST: addva za0.s, p5/m, p2/m, z19.s +// CHECK-ENCODING: [0x60,0x56,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 60 56 91 c0 + +addva za0.s, p6/m, p0/m, z12.s +// CHECK-INST: addva za0.s, p6/m, p0/m, z12.s +// CHECK-ENCODING: [0x80,0x19,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 80 19 91 c0 + +addva za1.s, p2/m, p6/m, z1.s +// CHECK-INST: addva za1.s, p2/m, p6/m, z1.s +// CHECK-ENCODING: [0x21,0xc8,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 21 c8 91 c0 + +addva za1.s, p2/m, p0/m, z22.s +// CHECK-INST: addva za1.s, p2/m, p0/m, z22.s +// CHECK-ENCODING: [0xc1,0x0a,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: c1 0a 91 c0 + +addva za2.s, p5/m, p7/m, z9.s +// CHECK-INST: addva za2.s, p5/m, p7/m, z9.s +// CHECK-ENCODING: [0x22,0xf5,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 22 f5 91 c0 + +addva za3.s, p2/m, p5/m, z12.s +// CHECK-INST: addva za3.s, p2/m, p5/m, z12.s +// CHECK-ENCODING: [0x83,0xa9,0x91,0xc0] +// CHECK-ERROR: instruction requires: sme +// CHECK-UNKNOWN: 83 a9 91 c0 diff --git a/llvm/test/MC/AArch64/SME/addva-u64.s b/llvm/test/MC/AArch64/SME/addva-u64.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME/addva-u64.s @@ -0,0 +1,85 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme-i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +addva za0.d, p0/m, p0/m, z0.d +// CHECK-INST: addva za0.d, p0/m, p0/m, z0.d +// CHECK-ENCODING: [0x00,0x00,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 00 00 d1 c0 + +addva za5.d, p5/m, p2/m, z10.d +// CHECK-INST: addva za5.d, p5/m, p2/m, z10.d +// CHECK-ENCODING: [0x45,0x55,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 45 55 d1 c0 + +addva za7.d, p3/m, p7/m, z13.d +// CHECK-INST: addva za7.d, p3/m, p7/m, z13.d +// CHECK-ENCODING: [0xa7,0xed,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: a7 ed d1 c0 + +addva za7.d, p7/m, p7/m, z31.d +// CHECK-INST: addva za7.d, p7/m, p7/m, z31.d +// CHECK-ENCODING: [0xe7,0xff,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: e7 ff d1 c0 + +addva za5.d, p3/m, p0/m, z17.d +// CHECK-INST: addva za5.d, p3/m, p0/m, z17.d +// CHECK-ENCODING: [0x25,0x0e,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 25 0e d1 c0 + +addva za1.d, p1/m, p4/m, z1.d +// CHECK-INST: addva za1.d, p1/m, p4/m, z1.d +// CHECK-ENCODING: [0x21,0x84,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 21 84 d1 c0 + +addva za0.d, p5/m, p2/m, z19.d +// CHECK-INST: addva za0.d, p5/m, p2/m, z19.d +// CHECK-ENCODING: [0x60,0x56,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 60 56 d1 c0 + +addva za0.d, p6/m, p0/m, z12.d +// CHECK-INST: addva za0.d, p6/m, p0/m, z12.d +// CHECK-ENCODING: [0x80,0x19,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 80 19 d1 c0 + +addva za1.d, p2/m, p6/m, z1.d +// CHECK-INST: addva za1.d, p2/m, p6/m, z1.d +// CHECK-ENCODING: [0x21,0xc8,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 21 c8 d1 c0 + +addva za5.d, p2/m, p0/m, z22.d +// CHECK-INST: addva za5.d, p2/m, p0/m, z22.d +// CHECK-ENCODING: [0xc5,0x0a,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: c5 0a d1 c0 + +addva za2.d, p5/m, p7/m, z9.d +// CHECK-INST: addva za2.d, p5/m, p7/m, z9.d +// CHECK-ENCODING: [0x22,0xf5,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 22 f5 d1 c0 + +addva za7.d, p2/m, p5/m, z12.d +// CHECK-INST: addva za7.d, p2/m, p5/m, z12.d +// CHECK-ENCODING: [0x87,0xa9,0xd1,0xc0] +// CHECK-ERROR: instruction requires: sme-i64 +// CHECK-UNKNOWN: 87 a9 d1 c0 diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt --- a/llvm/unittests/Target/AArch64/CMakeLists.txt +++ b/llvm/unittests/Target/AArch64/CMakeLists.txt @@ -20,4 +20,5 @@ add_llvm_target_unittest(AArch64Tests InstSizes.cpp DecomposeStackOffsetTest.cpp + MatrixRegisterAliasing.cpp ) diff --git a/llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp b/llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp @@ -0,0 +1,135 @@ +#include "AArch64Subtarget.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" + +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { +std::unique_ptr createTargetMachine() { + auto TT(Triple::normalize("aarch64--")); + std::string CPU("generic"); + std::string FS("+sme"); + + LLVMInitializeAArch64TargetInfo(); + LLVMInitializeAArch64Target(); + LLVMInitializeAArch64TargetMC(); + + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error); + + return std::unique_ptr( + static_cast(TheTarget->createTargetMachine( + TT, CPU, FS, TargetOptions(), None, None, CodeGenOpt::Default))); +} + +std::unique_ptr createInstrInfo(TargetMachine *TM) { + AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetFeatureString()), *TM, + /* isLittle */ false); + return std::make_unique(ST); +} + +TEST(MatrixRegisterAliasing, Aliasing) { + std::unique_ptr TM = createTargetMachine(); + ASSERT_TRUE(TM); + std::unique_ptr II = createInstrInfo(TM.get()); + + const AArch64RegisterInfo &TRI = II->getRegisterInfo(); + + // za overlaps with za.b + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZA, AArch64::ZAB0)); + + // za0.b overlaps with all tiles + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ0)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAQ15)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD0)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAD7)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS0)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAS3)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH0)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAB0, AArch64::ZAH1)); + + // za0.h aliases with za0.q, za2.q, .. + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ0)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ2)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ4)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ6)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ8)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ10)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ12)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ14)); + + // za1.h aliases with za1.q, za3.q, ... + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ1)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ3)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ5)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ7)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ9)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ11)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ13)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ15)); + + // za1.h doesn't alias with za0.q, za2.q, .. + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ0)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ2)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ4)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ6)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ8)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ10)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ12)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH1, AArch64::ZAQ14)); + + // za0.h doesn't alias with za1.q, za3.q, .. + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ1)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ3)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ5)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ7)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ9)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ11)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ13)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAH0, AArch64::ZAQ15)); + + // za0.s aliases with za0.q, za4.q, za8.q, za12.q + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ0)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ4)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ8)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ12)); + + // za1.s aliases with za1.q, za5.q, za9.q, za13.q + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ1)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ5)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ9)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ13)); + + // za0.s doesn't alias with za1.q, za5.q, za9.q, za13.q + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ1)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ5)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ9)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS0, AArch64::ZAQ13)); + + // za1.s doesn't alias with za0.q, za4.q, za8.q, za12.q + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ0)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ4)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ8)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAS1, AArch64::ZAQ12)); + + // za0.d aliases za0.q and za8.q + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ0)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ8)); + + // za1.d aliases za1.q and za9.q + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ1)); + ASSERT_TRUE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ9)); + + // za0.d doesn't alias with za1.q and za9.q + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ1)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD0, AArch64::ZAQ9)); + + // za1.d doesn't alias with za0.q and za8.q + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ0)); + ASSERT_FALSE(TRI.regsOverlap(AArch64::ZAD1, AArch64::ZAQ8)); +} + +} // end anonymous namespace