Index: lib/Target/ARM/ARM.td =================================================================== --- lib/Target/ARM/ARM.td +++ lib/Target/ARM/ARM.td @@ -114,6 +114,9 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable support for CRC instructions">; +def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", + "Enable support for dot product instructions", + [FeatureNEON]>; // Not to be confused with FeatureHasRetAddrStack (return address stack) def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -259,6 +259,8 @@ AssemblerPredicate<"FeatureNEON", "NEON">; def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; +def HasDotProd : Predicate<"Subtarget->hasDotProd()">, + AssemblerPredicate<"FeatureDotProd", "dotprod">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; def HasRAS : Predicate<"Subtarget->hasRAS()">, Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -4672,6 +4672,42 @@ (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, Requires<[HasVFP4]>; +// ARMv8.2a dot product instructions. +// We put them in the VFPV8 decoder namespace because the ARM and Thumb +// encodings are the same and thus no further bit twiddling is necessary +// in the disassembler. +let Predicates = [HasDotProd], DecoderNamespace = "VFPV8" in { + +def VUDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b1, + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>; +def VSDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b0, + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>; +def VUDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b1, + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>; +def VSDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b0, + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>; + +// Indexed dot product instructions: +class DOTI : + N3Vnp<0b11100, 0b10, 0b1101, Q, U, + (outs Ty:$Vd), (ins Ty:$Vn, DPR:$Vm, VectorIndex32:$lane), + N3RegFrm, IIC_VDOTPROD, opc, dt, []> { + bit lane; + let Inst{5} = lane; + let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); +} + +def VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR>; +def VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR>; +def VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR>; +def VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR>; + +} // HasDotProd + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) Index: lib/Target/ARM/ARMSchedule.td =================================================================== --- lib/Target/ARM/ARMSchedule.td +++ lib/Target/ARM/ARMSchedule.td @@ -414,6 +414,7 @@ def IIC_VTBX2 : InstrItinClass; def IIC_VTBX3 : InstrItinClass; def IIC_VTBX4 : InstrItinClass; +def IIC_VDOTPROD : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. Index: lib/Target/ARM/ARMSubtarget.h =================================================================== --- lib/Target/ARM/ARMSubtarget.h +++ lib/Target/ARM/ARMSubtarget.h @@ -156,6 +156,9 @@ bool HasFPARMv8 = false; bool HasNEON = false; + /// HasDotProd - True if the ARMv8.2A dot product instructions are supported. + bool HasDotProd = false; + /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to /// determine if NEON should actually be used. @@ -521,6 +524,7 @@ bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } + bool hasDotProd() const { return HasDotProd; } bool hasCRC() const { return HasCRC; } bool hasRAS() const { return HasRAS; } bool hasVirtualization() const { return HasVirtualization; } Index: lib/Target/ARM/AsmParser/ARMAsmParser.cpp =================================================================== --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5348,7 +5348,8 @@ Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" || - Mnemonic == "bxns" || Mnemonic == "blxns") + Mnemonic == "bxns" || Mnemonic == "blxns" || + Mnemonic == "vudot" || Mnemonic == "vsdot") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5454,7 +5455,8 @@ Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" || Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || (FullInst.startswith("vmull") && FullInst.endswith(".p64")) || - Mnemonic == "vmovx" || Mnemonic == "vins") { + Mnemonic == "vmovx" || Mnemonic == "vins" || + Mnemonic == "vudot" || Mnemonic == "vsdot") { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { Index: test/MC/ARM/armv8.2a-dotprod-a32.s =================================================================== --- /dev/null +++ test/MC/ARM/armv8.2a-dotprod-a32.s @@ -0,0 +1,37 @@ +// RUN: llvm-mc -triple arm -mattr=+dotprod -show-encoding < %s | FileCheck %s --check-prefix=CHECK + +// RUN: not llvm-mc -triple arm -mattr=-dotprod -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s +// RUN: not llvm-mc -triple arm -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s +// RUN: not llvm-mc -triple arm -mattr=+v8.1a -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s +// RUN: not llvm-mc -triple arm -mattr=+v8.2a -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s + +vudot.u8 d0, d1, d2 +vsdot.s8 d0, d1, d2 +vudot.u8 q0, q1, q4 +vsdot.s8 q0, q1, q4 +vudot.u8 d0, d1, d2[0] +vsdot.s8 d0, d1, d2[1] +vudot.u8 q0, q1, d4[0] +vsdot.s8 q0, q1, d4[1] + +// CHECK: vudot.u8 d0, d1, d2 @ encoding: [0x12,0x0d,0x21,0xfc] +// CHECK: vsdot.s8 d0, d1, d2 @ encoding: [0x02,0x0d,0x21,0xfc] +// CHECK: vudot.u8 q0, q1, q4 @ encoding: [0x58,0x0d,0x22,0xfc] +// CHECK: vsdot.s8 q0, q1, q4 @ encoding: [0x48,0x0d,0x22,0xfc] +// CHECK: vudot.u8 d0, d1, d2[0] @ encoding: [0x12,0x0d,0x21,0xfe] +// CHECK: vsdot.s8 d0, d1, d2[1] @ encoding: [0x22,0x0d,0x21,0xfe] +// CHECK: vudot.u8 q0, q1, d4[0] @ encoding: [0x54,0x0d,0x22,0xfe] +// CHECK: vsdot.s8 q0, q1, d4[1] @ encoding: [0x64,0x0d,0x22,0xfe] + +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod Index: test/MC/ARM/armv8.2a-dotprod-error.s =================================================================== --- /dev/null +++ test/MC/ARM/armv8.2a-dotprod-error.s @@ -0,0 +1,14 @@ +// RUN: not llvm-mc -triple arm -mattr=+dotprod -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s +// RUN: not llvm-mc -triple thumb -mattr=+dotprod -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s + +vudot.u8 d0, d1, d2[2] +vsdot.s8 d0, d1, d2[2] +vudot.u8 q0, q1, d4[2] +vsdot.s8 q0, q1, d4[2] + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction Index: test/MC/ARM/armv8.2a-dotprod-t32.s =================================================================== --- /dev/null +++ test/MC/ARM/armv8.2a-dotprod-t32.s @@ -0,0 +1,38 @@ +// RUN: llvm-mc -triple thumb -mattr=+dotprod -show-encoding < %s | FileCheck %s --check-prefix=CHECK + +// RUN: not llvm-mc -triple thumb -mattr=-dotprod -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s +// RUN: not llvm-mc -triple thumb -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s +// RUN: not llvm-mc -triple thumb -mattr=+v8.1a -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s +// RUN: not llvm-mc -triple thumb -mattr=+v8.2a -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s + + vudot.u8 d0, d1, d2 + vsdot.s8 d0, d1, d2 + vudot.u8 q0, q1, q4 + vsdot.s8 q0, q1, q4 + vudot.u8 d0, d1, d2[0] + vsdot.s8 d0, d1, d2[1] + vudot.u8 q0, q1, d4[0] + vsdot.s8 q0, q1, d4[1] + +//CHECK: vudot.u8 d0, d1, d2 @ encoding: [0x21,0xfc,0x12,0x0d] +//CHECK: vsdot.s8 d0, d1, d2 @ encoding: [0x21,0xfc,0x02,0x0d] +//CHECK: vudot.u8 q0, q1, q4 @ encoding: [0x22,0xfc,0x58,0x0d] +//CHECK: vsdot.s8 q0, q1, q4 @ encoding: [0x22,0xfc,0x48,0x0d] +//CHECK: vudot.u8 d0, d1, d2[0] @ encoding: [0x21,0xfe,0x12,0x0d] +//CHECK: vsdot.s8 d0, d1, d2[1] @ encoding: [0x21,0xfe,0x22,0x0d] +//CHECK: vudot.u8 q0, q1, d4[0] @ encoding: [0x22,0xfe,0x54,0x0d] +//CHECK: vsdot.s8 q0, q1, d4[1] @ encoding: [0x22,0xfe,0x64,0x0d] + +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +