diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -511,6 +511,7 @@ def SImm4s3Operand : SImmScaledMemoryIndexed<4, 3>; def SImm4s4Operand : SImmScaledMemoryIndexed<4, 4>; def SImm4s16Operand : SImmScaledMemoryIndexed<4, 16>; +def SImm4s32Operand : SImmScaledMemoryIndexed<4, 32>; def simm4s1 : Operand, ImmLeaf=-8 && Imm <= 7; }]> { @@ -544,6 +545,12 @@ let ParserMatchClass = SImm4s16Operand; let DecoderMethod = "DecodeSImm<4>"; } +def simm4s32 : Operand, ImmLeaf=-256 && Imm <= 224 && (Imm % 32) == 0x0; }]> { + let PrintMethod = "printImmScale<32>"; + let ParserMatchClass = SImm4s32Operand; + let DecoderMethod = "DecodeSImm<4>"; +} def Imm1_8Operand : AsmImmRange<1, 8>; def Imm1_16Operand : AsmImmRange<1, 16>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1769,6 +1769,37 @@ } +let Predicates = [HasSVE, HasMatMulInt8] in { + def SMMLA_ZZZ : sve_int_matmul<0b00, "smmla">; + def UMMLA_ZZZ : sve_int_matmul<0b11, "ummla">; + def USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla">; + def USDOT_ZZZ : sve_int_dot_mixed<"usdot">; + def USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot">; + def SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot">; +} + +let Predicates = [HasSVE, HasMatMulFP32] in { + def FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32>; +} + +let Predicates = [HasSVE, HasMatMulFP64] in { + def FMMLA_ZZZ_D : sve_fp_matrix_mla<1, "fmmla", ZPR64>; + defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8>; + defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16>; + defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32>; + defm LD1RO_D_IMM : sve_mem_ldor_si<0b11, "ld1rod", Z_d, ZPR64>; + defm LD1RO_B : sve_mem_ldor_ss<0b00, "ld1rob", Z_b, ZPR8, GPR64NoXZRshifted8>; + defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16>; + defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32>; + defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64>; + def ZIP1_ZZZ_128 : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1">; + def ZIP2_ZZZ_128 : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2">; + def UZP1_ZZZ_128 : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1">; + def UZP2_ZZZ_128 : sve_int_perm_bin_perm_128_zz<0b01, 1, "uzp2">; + def TRN1_ZZZ_128 : sve_int_perm_bin_perm_128_zz<0b11, 0, "trn1">; + def TRN2_ZZZ_128 : sve_int_perm_bin_perm_128_zz<0b11, 1, "trn2">; +} + let Predicates = [HasSVE2] in { // SVE2 integer multiply-add (indexed) defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", int_aarch64_sve_mla_lane>; diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -4256,6 +4256,8 @@ return Error(Loc, "index must be a multiple of 4 in range [-32, 28]."); case Match_InvalidMemoryIndexed16SImm4: return Error(Loc, "index must be a multiple of 16 in range [-128, 112]."); + case Match_InvalidMemoryIndexed32SImm4: + return Error(Loc, "index must be a multiple of 32 in range [-256, 224]."); case Match_InvalidMemoryIndexed1SImm6: return Error(Loc, "index must be an integer in range [-32, 31]."); case Match_InvalidMemoryIndexedSImm8: @@ -4915,6 +4917,7 @@ case Match_InvalidMemoryIndexed4SImm4: case Match_InvalidMemoryIndexed1SImm6: case Match_InvalidMemoryIndexed16SImm4: + case Match_InvalidMemoryIndexed32SImm4: case Match_InvalidMemoryIndexed4SImm7: case Match_InvalidMemoryIndexed8SImm7: case Match_InvalidMemoryIndexed16SImm7: diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -7548,6 +7548,181 @@ let ElementSize = ElementSizeS; } +//===----------------------------------------------------------------------===// +// SVE Integer Matrix Multiply Group +//===----------------------------------------------------------------------===// + +class sve_int_matmul uns, string asm> +: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR8:$Zm), asm, + "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + bits<5> Zm; + let Inst{31-24} = 0b01000101; + let Inst{23-22} = uns; + let Inst{21} = 0; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b100110; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = DestructiveOther; + let ElementSize = ZPR32.ElementSize; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Dot Product Mixed Sign Group +//===----------------------------------------------------------------------===// + +class sve_int_dot_mixed +: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR8:$Zm), asm, + "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + bits<5> Zm; + let Inst{31-21} = 0b01000100100; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b011110; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = DestructiveOther; + let ElementSize = ZPR32.ElementSize; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Dot Product Mixed Sign - Indexed Group +//===----------------------------------------------------------------------===// + +class sve_int_dot_mixed_indexed +: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexS:$idx), + asm, "\t$Zda, $Zn, $Zm$idx", "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + bits<3> Zm; + bits<2> idx; + let Inst{31-21} = 0b01000100101; + let Inst{20-19} = idx; + let Inst{18-16} = Zm; + let Inst{15-11} = 0b00011; + let Inst{10} = U; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = DestructiveOther; + let ElementSize = ZPR32.ElementSize; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Matrix Multiply Accumulate Group +//===----------------------------------------------------------------------===// + +class sve_fp_matrix_mla +: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, zprty:$Zm), + asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + bits<5> Zm; + let Inst{31-23} = 0b011001001; + let Inst{22} = sz; + let Inst{21} = 1; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b111001; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = DestructiveOther; + let ElementSize = zprty.ElementSize; +} + +//===----------------------------------------------------------------------===// +// SVE Memory - Contiguous Load And Replicate 256-bit Group +//===----------------------------------------------------------------------===// + +class sve_mem_ldor_si sz, string asm, RegisterOperand VecList> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s32:$imm4), + asm, "\t$Zt, $Pg/z, [$Rn, $imm4]", "", []>, Sched<[]> { + bits<5> Zt; + bits<5> Rn; + bits<3> Pg; + bits<4> imm4; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = sz; + let Inst{22-20} = 0b010; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_ldor_si sz, string asm, RegisterOperand listty, + ZPRRegOp zprty> { + def NAME : sve_mem_ldor_si; + def : InstAlias(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s32:$imm4), 0>; +} + +class sve_mem_ldor_ss sz, string asm, RegisterOperand VecList, + RegisterOperand gprty> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", "", []>, Sched<[]> { + bits<5> Zt; + bits<3> Pg; + bits<5> Rn; + bits<5> Rm; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = sz; + let Inst{22-21} = 0b01; + let Inst{20-16} = Rm; + let Inst{15-13} = 0; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_ldor_ss sz, string asm, RegisterOperand listty, + ZPRRegOp zprty, RegisterOperand gprty> { + def NAME : sve_mem_ldor_ss; + + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; +} + +//===----------------------------------------------------------------------===// +// SVE Interleave 128-bit Elements Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_bin_perm_128_zz opc, bit P, string asm> +: I<(outs ZPR128:$Zd), (ins ZPR128:$Zn, ZPR128:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-21} = 0b00000101101; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b000; + let Inst{12-11} = opc; + let Inst{10} = P; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + + /// Addressing modes def am_sve_indexed_s4 :ComplexPattern", [], [SDNPWantRoot]>; def am_sve_indexed_s6 :ComplexPattern", [], [SDNPWantRoot]>; diff --git a/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s @@ -0,0 +1,86 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+f32mm,+f64mm 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// FMMLA (SVE) + +// Invalid element size + +fmmla z0.h, z1.h, z2.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width + +// Mis-matched element size + +fmmla z0.d, z1.s, z2.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +fmmla z0.s, z1.d, z2.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +fmmla z0.s, z1.s, z2.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width + + +// --------------------------------------------------------------------------// +// LD1RO (SVE, scalar plus immediate) + +// Immediate too high (>224) +ld1rob { z0.b }, p1/z, [x2, #256] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. +ld1roh { z0.h }, p1/z, [x2, #256] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. +ld1row { z0.s }, p1/z, [x2, #256] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. +ld1rod { z0.d }, p1/z, [x2, #256] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. + +// Immediate too low (<-256) +ld1rob { z0.b }, p1/z, [x2, #-288] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. +ld1roh { z0.h }, p1/z, [x2, #-288] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. +ld1row { z0.s }, p1/z, [x2, #-288] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. +ld1rod { z0.d }, p1/z, [x2, #-288] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. + +// Immediate not a multiple of 32 +ld1rob { z0.b }, p1/z, [x2, #16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. +ld1roh { z0.h }, p1/z, [x2, #16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. +ld1row { z0.s }, p1/z, [x2, #16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. +ld1rod { z0.d }, p1/z, [x2, #16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 32 in range [-256, 224]. + +// Prediate register too high +ld1rob { z0.b }, p8/z, [x2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +ld1roh { z0.h }, p8/z, [x2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +ld1row { z0.s }, p8/z, [x2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +ld1rod { z0.d }, p8/z, [x2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) + + +// --------------------------------------------------------------------------// +// LD1RO (SVE, scalar plus scalar) + +// Shift amount not matched to data width +ld1rob { z0.b }, p1/z, [x2, x3, lsl #1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: register must be x0..x30 without shift +ld1roh { z0.h }, p1/z, [x2, x3, lsl #0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: register must be x0..x30 with required shift 'lsl #1' +ld1row { z0.s }, p1/z, [x2, x3, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: register must be x0..x30 with required shift 'lsl #2' +ld1rod { z0.d }, p1/z, [x2, x3, lsl #2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: register must be x0..x30 with required shift 'lsl #3' + +// Prediate register too high +ld1rob { z0.b }, p8/z, [x2, x3, lsl #0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +ld1roh { z0.h }, p8/z, [x2, x3, lsl #1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +ld1row { z0.s }, p8/z, [x2, x3, lsl #2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +ld1rod { z0.d }, p8/z, [x2, x3, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) diff --git a/llvm/test/MC/AArch64/SVE/matrix-multiply-fp32.s b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp32.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp32.s @@ -0,0 +1,17 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+f32mm < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+i8mm,+f64mm < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+f32mm < %s \ +// RUN: | llvm-objdump -d --mattr=+sve,+f32mm - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+f32mm < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN + +// --------------------------------------------------------------------------// +// FMMLA (SVE) + +fmmla z0.s, z1.s, z2.s +// CHECK-INST: fmmla z0.s, z1.s, z2.s +// CHECK-ENCODING: [0x20,0xe4,0xa2,0x64] +// CHECK-ERROR: instruction requires: f32mm +// CHECK-UNKNOWN: 20 e4 a2 64 diff --git a/llvm/test/MC/AArch64/SVE/matrix-multiply-fp64.s b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp64.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp64.s @@ -0,0 +1,281 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+f64mm < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+i8mm,+f32mm < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+f64mm < %s \ +// RUN: | llvm-objdump -d --mattr=+sve,+f64mm - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+f64mm < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN + +// --------------------------------------------------------------------------// +// FMMLA (SVE) + +fmmla z0.d, z1.d, z2.d +// CHECK-INST: fmmla z0.d, z1.d, z2.d +// CHECK-ENCODING: [0x20,0xe4,0xe2,0x64] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 20 e4 e2 64 + +// --------------------------------------------------------------------------// +// LD1RO (SVE, scalar plus immediate) + +// With maximum immediate (224) + +ld1rob { z0.b }, p1/z, [x2, #224] +// CHECK-INST: ld1rob { z0.b }, p1/z, [x2, #224] +// CHECK-ENCODING: [0x40,0x24,0x27,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 27 a4 + +ld1roh { z0.h }, p1/z, [x2, #224] +// CHECK-INST: ld1roh { z0.h }, p1/z, [x2, #224] +// CHECK-ENCODING: [0x40,0x24,0xa7,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a7 a4 + +ld1row { z0.s }, p1/z, [x2, #224] +// CHECK-INST: ld1row { z0.s }, p1/z, [x2, #224] +// CHECK-ENCODING: [0x40,0x24,0x27,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 27 a5 + +ld1rod { z0.d }, p1/z, [x2, #224] +// CHECK-INST: ld1rod { z0.d }, p1/z, [x2, #224] +// CHECK-ENCODING: [0x40,0x24,0xa7,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a7 a5 + +// With minimum immediate (-256) + +ld1rob { z0.b }, p1/z, [x2, #-256] +// CHECK-INST: ld1rob { z0.b }, p1/z, [x2, #-256] +// CHECK-ENCODING: [0x40,0x24,0x28,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 28 a4 + +ld1roh { z0.h }, p1/z, [x2, #-256] +// CHECK-INST: ld1roh { z0.h }, p1/z, [x2, #-256] +// CHECK-ENCODING: [0x40,0x24,0xa8,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a8 a4 + +ld1row { z0.s }, p1/z, [x2, #-256] +// CHECK-INST: ld1row { z0.s }, p1/z, [x2, #-256] +// CHECK-ENCODING: [0x40,0x24,0x28,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 28 a5 + +ld1rod { z0.d }, p1/z, [x2, #-256] +// CHECK-INST: ld1rod { z0.d }, p1/z, [x2, #-256] +// CHECK-ENCODING: [0x40,0x24,0xa8,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a8 a5 + +// Aliases with a vector first operand, and omitted offset. + +ld1rob { z0.b }, p1/z, [x2] +// CHECK-INST: ld1rob { z0.b }, p1/z, [x2] +// CHECK-ENCODING: [0x40,0x24,0x20,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 20 a4 + +ld1roh { z0.h }, p1/z, [x2] +// CHECK-INST: ld1roh { z0.h }, p1/z, [x2] +// CHECK-ENCODING: [0x40,0x24,0xa0,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a0 a4 + +ld1row { z0.s }, p1/z, [x2] +// CHECK-INST: ld1row { z0.s }, p1/z, [x2] +// CHECK-ENCODING: [0x40,0x24,0x20,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 20 a5 + +ld1rod { z0.d }, p1/z, [x2] +// CHECK-INST: ld1rod { z0.d }, p1/z, [x2] +// CHECK-ENCODING: [0x40,0x24,0xa0,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a0 a5 + +// Aliases with a plain (non-list) first operand, and omitted offset. + +ld1rob z0.b, p1/z, [x2] +// CHECK-INST: ld1rob { z0.b }, p1/z, [x2] +// CHECK-ENCODING: [0x40,0x24,0x20,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 20 a4 + +ld1roh z0.h, p1/z, [x2] +// CHECK-INST: ld1roh { z0.h }, p1/z, [x2] +// CHECK-ENCODING: [0x40,0x24,0xa0,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a0 a4 + +ld1row z0.s, p1/z, [x2] +// CHECK-INST: ld1row { z0.s }, p1/z, [x2] +// CHECK-ENCODING: [0x40,0x24,0x20,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 20 a5 + +ld1rod z0.d, p1/z, [x2] +// CHECK-INST: ld1rod { z0.d }, p1/z, [x2] +// CHECK-ENCODING: [0x40,0x24,0xa0,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a0 a5 + +// Aliases with a plain (non-list) first operand, plus offset. + +// With maximum immediate (224) + +ld1rob z0.b, p1/z, [x2, #224] +// CHECK-INST: ld1rob { z0.b }, p1/z, [x2, #224] +// CHECK-ENCODING: [0x40,0x24,0x27,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 27 a4 + +ld1roh z0.h, p1/z, [x2, #224] +// CHECK-INST: ld1roh { z0.h }, p1/z, [x2, #224] +// CHECK-ENCODING: [0x40,0x24,0xa7,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a7 a4 + +ld1row z0.s, p1/z, [x2, #224] +// CHECK-INST: ld1row { z0.s }, p1/z, [x2, #224] +// CHECK-ENCODING: [0x40,0x24,0x27,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 27 a5 + +ld1rod z0.d, p1/z, [x2, #224] +// CHECK-INST: ld1rod { z0.d }, p1/z, [x2, #224] +// CHECK-ENCODING: [0x40,0x24,0xa7,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a7 a5 + +// With minimum immediate (-256) + +ld1rob z0.b, p1/z, [x2, #-256] +// CHECK-INST: ld1rob { z0.b }, p1/z, [x2, #-256] +// CHECK-ENCODING: [0x40,0x24,0x28,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 28 a4 + +ld1roh z0.h, p1/z, [x2, #-256] +// CHECK-INST: ld1roh { z0.h }, p1/z, [x2, #-256] +// CHECK-ENCODING: [0x40,0x24,0xa8,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a8 a4 + +ld1row z0.s, p1/z, [x2, #-256] +// CHECK-INST: ld1row { z0.s }, p1/z, [x2, #-256] +// CHECK-ENCODING: [0x40,0x24,0x28,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 28 a5 + +ld1rod z0.d, p1/z, [x2, #-256] +// CHECK-INST: ld1rod { z0.d }, p1/z, [x2, #-256] +// CHECK-ENCODING: [0x40,0x24,0xa8,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 24 a8 a5 + + +// --------------------------------------------------------------------------// +// LD1RO (SVE, scalar plus scalar) + +ld1rob { z0.b }, p1/z, [x2, x3, lsl #0] +// CHECK-INST: ld1rob { z0.b }, p1/z, [x2, x3] +// CHECK-ENCODING: [0x40,0x04,0x23,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 04 23 a4 + +ld1roh { z0.h }, p1/z, [x2, x3, lsl #1] +// CHECK-INST: ld1roh { z0.h }, p1/z, [x2, x3, lsl #1] +// CHECK-ENCODING: [0x40,0x04,0xa3,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 04 a3 a4 + +ld1row { z0.s }, p1/z, [x2, x3, lsl #2] +// CHECK-INST: ld1row { z0.s }, p1/z, [x2, x3, lsl #2] +// CHECK-ENCODING: [0x40,0x04,0x23,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 04 23 a5 + +ld1rod { z0.d }, p1/z, [x2, x3, lsl #3] +// CHECK-INST: ld1rod { z0.d }, p1/z, [x2, x3, lsl #3] +// CHECK-ENCODING: [0x40,0x04,0xa3,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 04 a3 a5 + +// Aliases with a plain (non-list) first operand, and omitted shift for the +// byte variant. + +ld1rob z0.b, p1/z, [x2, x3] +// CHECK-INST: ld1rob { z0.b }, p1/z, [x2, x3] +// CHECK-ENCODING: [0x40,0x04,0x23,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 04 23 a4 + +ld1roh z0.h, p1/z, [x2, x3, lsl #1] +// CHECK-INST: ld1roh { z0.h }, p1/z, [x2, x3, lsl #1] +// CHECK-ENCODING: [0x40,0x04,0xa3,0xa4] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 04 a3 a4 + +ld1row z0.s, p1/z, [x2, x3, lsl #2] +// CHECK-INST: ld1row { z0.s }, p1/z, [x2, x3, lsl #2] +// CHECK-ENCODING: [0x40,0x04,0x23,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 04 23 a5 + +ld1rod z0.d, p1/z, [x2, x3, lsl #3] +// CHECK-INST: ld1rod { z0.d }, p1/z, [x2, x3, lsl #3] +// CHECK-ENCODING: [0x40,0x04,0xa3,0xa5] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 40 04 a3 a5 + + +// --------------------------------------------------------------------------// +// ZIP1, ZIP2 (SVE, 128-bit element) + +zip1 z0.q, z1.q, z2.q +// CHECK-INST: zip1 z0.q, z1.q, z2.q +// CHECK-ENCODING: [0x20,0x00,0xa2,0x05] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 20 00 a2 05 + +zip2 z0.q, z1.q, z2.q +// CHECK-INST: zip2 z0.q, z1.q, z2.q +// CHECK-ENCODING: [0x20,0x04,0xa2,0x05] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 20 04 a2 05 + + +// --------------------------------------------------------------------------// +// UZP1, UZP2 (SVE, 128-bit element) + +uzp1 z0.q, z1.q, z2.q +// CHECK-INST: uzp1 z0.q, z1.q, z2.q +// CHECK-ENCODING: [0x20,0x08,0xa2,0x05] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 20 08 a2 05 + +uzp2 z0.q, z1.q, z2.q +// CHECK-INST: uzp2 z0.q, z1.q, z2.q +// CHECK-ENCODING: [0x20,0x0c,0xa2,0x05] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 20 0c a2 05 + + +// --------------------------------------------------------------------------// +// TRN1, TRN2 (SVE, 128-bit element) + +trn1 z0.q, z1.q, z2.q +// CHECK-INST: trn1 z0.q, z1.q, z2.q +// CHECK-ENCODING: [0x20,0x18,0xa2,0x05] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 20 18 a2 05 + +trn2 z0.q, z1.q, z2.q +// CHECK-INST: trn2 z0.q, z1.q, z2.q +// CHECK-ENCODING: [0x20,0x1c,0xa2,0x05] +// CHECK-ERROR: instruction requires: f64mm +// CHECK-UNKNOWN: 20 1c a2 05 diff --git a/llvm/test/MC/AArch64/SVE/matrix-multiply-int8-diagnostics.s b/llvm/test/MC/AArch64/SVE/matrix-multiply-int8-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE/matrix-multiply-int8-diagnostics.s @@ -0,0 +1,78 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+i8mm 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// SMMLA, UMMLA, USMMLA (SVE) + +// Invalid element size + +ummla z0.h, z1.b, z2.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +ummla z0.s, z1.h, z2.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +ummla z0.s, z1.b, z2.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width + +// Negative tests for instructions that are incompatible with predicated movprfx + +movprfx z0.d, p0/z, z7.d +ummla z0.s, z1.b, z2.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +movprfx z0.d, p0/z, z7.d +smmla z0.s, z1.b, z2.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +movprfx z0.d, p0/z, z7.d +usmmla z0.s, z1.b, z2.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx + + +// --------------------------------------------------------------------------// +// USDOT (SVE, vectors) + +// Invalid element size + +usdot z0.d, z1.b, z2.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +usdot z0.s, z1.s, z2.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +usdot z0.s, z1.b, z2.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z7.b + +// Negative tests for instructions that are incompatible with predicated movprfx + +movprfx z0.d, p0/z, z7.d +usdot z0.s, z1.b, z2.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx + + +// --------------------------------------------------------------------------// +// USDOT, SUDOT (SVE, indexed) + +// Invalid element size + +usdot z0.h, z1.b, z2.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +sudot z0.s, z1.h, z2.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +usdot z0.s, z1.b, z2.s[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z7.b + +// Invalid restricted register for indexed vector. +usdot z0.s, z1.b, z9.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +sudot z0.s, z1.b, z9.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z7.b + +// Invalid element index +usdot z0.s, z1.b, z2.b[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +sudot z0.s, z1.b, z2.b[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. + +// Negative tests for instructions that are incompatible with predicated movprfx + +movprfx z0.d, p0/z, z7.d +usdot z0.s, z1.b, z2.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +movprfx z0.d, p0/z, z7.d +sudot z0.s, z1.b, z2.b[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx diff --git a/llvm/test/MC/AArch64/SVE/matrix-multiply-int8.s b/llvm/test/MC/AArch64/SVE/matrix-multiply-int8.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE/matrix-multiply-int8.s @@ -0,0 +1,129 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+i8mm < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+i8mm < %s \ +// RUN: | llvm-objdump -d --mattr=+sve,+i8mm - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+i8mm < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN + + +// --------------------------------------------------------------------------// +// SMMLA, UMMLA, USMMLA (SVE) + +ummla z0.s, z1.b, z2.b +// CHECK-INST: ummla z0.s, z1.b, z2.b +// CHECK-ENCODING: [0x20,0x98,0xc2,0x45] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 98 c2 45 + +smmla z0.s, z1.b, z2.b +// CHECK-INST: smmla z0.s, z1.b, z2.b +// CHECK-ENCODING: [0x20,0x98,0x02,0x45] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 98 02 45 + +usmmla z0.s, z1.b, z2.b +// CHECK-INST: usmmla z0.s, z1.b, z2.b +// CHECK-ENCODING: [0x20,0x98,0x82,0x45] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 98 82 45 + + +// Test compatibility with MOVPRFX instruction. + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-UNKNOWN: e0 bc 20 04 + +ummla z0.s, z1.b, z2.b +// CHECK-INST: ummla z0.s, z1.b, z2.b +// CHECK-ENCODING: [0x20,0x98,0xc2,0x45] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 98 c2 45 + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-UNKNOWN: e0 bc 20 04 + +smmla z0.s, z1.b, z2.b +// CHECK-INST: smmla z0.s, z1.b, z2.b +// CHECK-ENCODING: [0x20,0x98,0x02,0x45] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 98 02 45 + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-UNKNOWN: e0 bc 20 04 + +usmmla z0.s, z1.b, z2.b +// CHECK-INST: usmmla z0.s, z1.b, z2.b +// CHECK-ENCODING: [0x20,0x98,0x82,0x45] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 98 82 45 + + +// --------------------------------------------------------------------------// +// USDOT (SVE, vectors) + +usdot z0.s, z1.b, z2.b +// CHECK-INST: usdot z0.s, z1.b, z2.b +// CHECK-ENCODING: [0x20,0x78,0x82,0x44] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 78 82 44 + +// Test compatibility with MOVPRFX instruction. + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-UNKNOWN: e0 bc 20 04 + +usdot z0.s, z1.b, z2.b +// CHECK-INST: usdot z0.s, z1.b, z2.b +// CHECK-ENCODING: [0x20,0x78,0x82,0x44] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 78 82 44 + + +// --------------------------------------------------------------------------// +// USDOT, SUDOT (SVE, indexed) + +usdot z0.s, z1.b, z2.b[0] +// CHECK-INST: usdot z0.s, z1.b, z2.b[0] +// CHECK-ENCODING: [0x20,0x18,0xa2,0x44] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 18 a2 44 + +sudot z0.s, z1.b, z2.b[3] +// CHECK-INST: sudot z0.s, z1.b, z2.b[3] +// CHECK-ENCODING: [0x20,0x1c,0xba,0x44] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 1c ba 44 + +// Test compatibility with MOVPRFX instruction. + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-UNKNOWN: e0 bc 20 04 + +usdot z0.s, z1.b, z2.b[0] +// CHECK-INST: usdot z0.s, z1.b, z2.b[0] +// CHECK-ENCODING: [0x20,0x18,0xa2,0x44] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 18 a2 44 + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-UNKNOWN: e0 bc 20 04 + +sudot z0.s, z1.b, z2.b[0] +// CHECK-INST: sudot z0.s, z1.b, z2.b[0] +// CHECK-ENCODING: [0x20,0x1c,0xa2,0x44] +// CHECK-ERROR: instruction requires: i8mm +// CHECK-UNKNOWN: 20 1c a2 44