Index: llvm/trunk/lib/Target/ARM/ARM.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARM.td +++ llvm/trunk/lib/Target/ARM/ARM.td @@ -114,6 +114,9 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable support for CRC instructions">; +def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", + "Enable support for dot product instructions", + [FeatureNEON]>; // Not to be confused with FeatureHasRetAddrStack (return address stack) def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", Index: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td @@ -259,6 +259,8 @@ AssemblerPredicate<"FeatureNEON", "NEON">; def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; +def HasDotProd : Predicate<"Subtarget->hasDotProd()">, + AssemblerPredicate<"FeatureDotProd", "dotprod">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; def HasRAS : Predicate<"Subtarget->hasRAS()">, @@ -5037,6 +5039,8 @@ let Inst{15-12} = CRd; let Inst{19-16} = CRn; let Inst{23-20} = opc1; + + let DecoderNamespace = "CoProc"; } def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, @@ -5060,6 +5064,8 @@ let Inst{15-12} = CRd; let Inst{19-16} = CRn; let Inst{23-20} = opc1; + + let DecoderNamespace = "CoProc"; } class ACI pattern> { def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), asm, "\t$cop, $CRd, $addr", pattern> { @@ -5228,6 +5236,8 @@ defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +} // DecoderNamespace = "CoProc" + //===----------------------------------------------------------------------===// // Move between coprocessor and ARM core register. // @@ -5252,6 +5262,8 @@ let Inst{7-5} = opc2; let Inst{3-0} = CRm; let Inst{19-16} = CRn; + + let DecoderNamespace = "CoProc"; } def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, @@ -5296,6 +5308,8 @@ let Inst{7-5} = opc2; let Inst{3-0} = CRm; let Inst{19-16} = CRn; + + let DecoderNamespace = "CoProc"; } def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, Index: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td @@ -4672,6 +4672,42 @@ (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, Requires<[HasVFP4]>; +// ARMv8.2a dot product instructions. +// We put them in the VFPV8 decoder namespace because the ARM and Thumb +// encodings are the same and thus no further bit twiddling is necessary +// in the disassembler. +let Predicates = [HasDotProd], DecoderNamespace = "VFPV8" in { + +def VUDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b1, + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>; +def VSDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b0, + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>; +def VUDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b1, + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>; +def VSDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b0, + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>; + +// Indexed dot product instructions: +class DOTI : + N3Vnp<0b11100, 0b10, 0b1101, Q, U, + (outs Ty:$Vd), (ins Ty:$Vn, DPR:$Vm, VectorIndex32:$lane), + N3RegFrm, IIC_VDOTPROD, opc, dt, []> { + bit lane; + let Inst{5} = lane; + let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); +} + +def VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR>; +def VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR>; +def VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR>; +def VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR>; + +} // HasDotProd + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) Index: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td @@ -3964,6 +3964,7 @@ } } +let DecoderNamespace = "Thumb2CoProc" in { defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; @@ -3973,6 +3974,7 @@ defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +} //===----------------------------------------------------------------------===// @@ -4125,6 +4127,8 @@ let Inst{7-5} = opc2; let Inst{3-0} = CRm; let Inst{19-16} = CRn; + + let DecoderNamespace = "Thumb2CoProc"; } class t2MovRRCopro Op, string opc, bit direction, dag oops, dag iops, @@ -4145,6 +4149,8 @@ let Inst{11-8} = cop; let Inst{7-4} = opc1; let Inst{3-0} = CRm; + + let DecoderNamespace = "Thumb2CoProc"; } /* from ARM core register to coprocessor */ @@ -4243,6 +4249,7 @@ let Inst{23-20} = opc1; let Predicates = [IsThumb2, PreV8]; + let DecoderNamespace = "Thumb2CoProc"; } def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, @@ -4268,6 +4275,7 @@ let Inst{23-20} = opc1; let Predicates = [IsThumb2, PreV8]; + let DecoderNamespace = "Thumb2CoProc"; } Index: llvm/trunk/lib/Target/ARM/ARMSchedule.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSchedule.td +++ llvm/trunk/lib/Target/ARM/ARMSchedule.td @@ -414,6 +414,7 @@ def IIC_VTBX2 : InstrItinClass; def IIC_VTBX3 : InstrItinClass; def IIC_VTBX4 : InstrItinClass; +def IIC_VDOTPROD : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h @@ -156,6 +156,9 @@ bool HasFPARMv8 = false; bool HasNEON = false; + /// HasDotProd - True if the ARMv8.2A dot product instructions are supported. + bool HasDotProd = false; + /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to /// determine if NEON should actually be used. @@ -521,6 +524,7 @@ bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } + bool hasDotProd() const { return HasDotProd; } bool hasCRC() const { return HasCRC; } bool hasRAS() const { return HasRAS; } bool hasVirtualization() const { return HasVirtualization; } Index: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5348,7 +5348,8 @@ Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" || - Mnemonic == "bxns" || Mnemonic == "blxns") + Mnemonic == "bxns" || Mnemonic == "blxns" || + Mnemonic == "vudot" || Mnemonic == "vsdot") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5454,7 +5455,8 @@ Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" || Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || (FullInst.startswith("vmull") && FullInst.endswith(".p64")) || - Mnemonic == "vmovx" || Mnemonic == "vins") { + Mnemonic == "vmovx" || Mnemonic == "vins" || + Mnemonic == "vudot" || Mnemonic == "vsdot") { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { Index: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -486,6 +486,13 @@ } } + Result = + decodeInstruction(DecoderTableCoProc32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result); + } + Size = 4; return MCDisassembler::Fail; } @@ -821,6 +828,14 @@ } } + Result = + decodeInstruction(DecoderTableThumb2CoProc32, MI, Insn32, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + Check(Result, AddThumbPredicate(MI)); + return Result; + } + Size = 0; return MCDisassembler::Fail; } Index: llvm/trunk/test/MC/ARM/armv8.2a-dotprod-a32.s =================================================================== --- llvm/trunk/test/MC/ARM/armv8.2a-dotprod-a32.s +++ llvm/trunk/test/MC/ARM/armv8.2a-dotprod-a32.s @@ -0,0 +1,37 @@ +// RUN: llvm-mc -triple arm -mattr=+dotprod -show-encoding < %s | FileCheck %s --check-prefix=CHECK + +// RUN: not llvm-mc -triple arm -mattr=-dotprod -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s +// RUN: not llvm-mc -triple arm -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s +// RUN: not llvm-mc -triple arm -mattr=+v8.1a -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s +// RUN: not llvm-mc -triple arm -mattr=+v8.2a -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s + +vudot.u8 d0, d1, d2 +vsdot.s8 d0, d1, d2 +vudot.u8 q0, q1, q4 +vsdot.s8 q0, q1, q4 +vudot.u8 d0, d1, d2[0] +vsdot.s8 d0, d1, d2[1] +vudot.u8 q0, q1, d4[0] +vsdot.s8 q0, q1, d4[1] + +// CHECK: vudot.u8 d0, d1, d2 @ encoding: [0x12,0x0d,0x21,0xfc] +// CHECK: vsdot.s8 d0, d1, d2 @ encoding: [0x02,0x0d,0x21,0xfc] +// CHECK: vudot.u8 q0, q1, q4 @ encoding: [0x58,0x0d,0x22,0xfc] +// CHECK: vsdot.s8 q0, q1, q4 @ encoding: [0x48,0x0d,0x22,0xfc] +// CHECK: vudot.u8 d0, d1, d2[0] @ encoding: [0x12,0x0d,0x21,0xfe] +// CHECK: vsdot.s8 d0, d1, d2[1] @ encoding: [0x22,0x0d,0x21,0xfe] +// CHECK: vudot.u8 q0, q1, d4[0] @ encoding: [0x54,0x0d,0x22,0xfe] +// CHECK: vsdot.s8 q0, q1, d4[1] @ encoding: [0x64,0x0d,0x22,0xfe] + +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod +// CHECK-NO-DOTPROD: error: instruction requires: dotprod Index: llvm/trunk/test/MC/ARM/armv8.2a-dotprod-error.s =================================================================== --- llvm/trunk/test/MC/ARM/armv8.2a-dotprod-error.s +++ llvm/trunk/test/MC/ARM/armv8.2a-dotprod-error.s @@ -0,0 +1,14 @@ +// RUN: not llvm-mc -triple arm -mattr=+dotprod -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s +// RUN: not llvm-mc -triple thumb -mattr=+dotprod -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s + +vudot.u8 d0, d1, d2[2] +vsdot.s8 d0, d1, d2[2] +vudot.u8 q0, q1, d4[2] +vsdot.s8 q0, q1, d4[2] + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: error: invalid operand for instruction Index: llvm/trunk/test/MC/ARM/armv8.2a-dotprod-t32.s =================================================================== --- llvm/trunk/test/MC/ARM/armv8.2a-dotprod-t32.s +++ llvm/trunk/test/MC/ARM/armv8.2a-dotprod-t32.s @@ -0,0 +1,38 @@ +// RUN: llvm-mc -triple thumb -mattr=+dotprod -show-encoding < %s | FileCheck %s --check-prefix=CHECK + +// RUN: not llvm-mc -triple thumb -mattr=-dotprod -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s +// RUN: not llvm-mc -triple thumb -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s +// RUN: not llvm-mc -triple thumb -mattr=+v8.1a -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s +// RUN: not llvm-mc -triple thumb -mattr=+v8.2a -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s + + vudot.u8 d0, d1, d2 + vsdot.s8 d0, d1, d2 + vudot.u8 q0, q1, q4 + vsdot.s8 q0, q1, q4 + vudot.u8 d0, d1, d2[0] + vsdot.s8 d0, d1, d2[1] + vudot.u8 q0, q1, d4[0] + vsdot.s8 q0, q1, d4[1] + +//CHECK: vudot.u8 d0, d1, d2 @ encoding: [0x21,0xfc,0x12,0x0d] +//CHECK: vsdot.s8 d0, d1, d2 @ encoding: [0x21,0xfc,0x02,0x0d] +//CHECK: vudot.u8 q0, q1, q4 @ encoding: [0x22,0xfc,0x58,0x0d] +//CHECK: vsdot.s8 q0, q1, q4 @ encoding: [0x22,0xfc,0x48,0x0d] +//CHECK: vudot.u8 d0, d1, d2[0] @ encoding: [0x21,0xfe,0x12,0x0d] +//CHECK: vsdot.s8 d0, d1, d2[1] @ encoding: [0x21,0xfe,0x22,0x0d] +//CHECK: vudot.u8 q0, q1, d4[0] @ encoding: [0x22,0xfe,0x54,0x0d] +//CHECK: vsdot.s8 q0, q1, d4[1] @ encoding: [0x22,0xfe,0x64,0x0d] + +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod +//CHECK-ERROR: error: instruction requires: dotprod + Index: llvm/trunk/test/MC/Disassembler/ARM/armv8.2a-dotprod-a32.s =================================================================== --- llvm/trunk/test/MC/Disassembler/ARM/armv8.2a-dotprod-a32.s +++ llvm/trunk/test/MC/Disassembler/ARM/armv8.2a-dotprod-a32.s @@ -0,0 +1,33 @@ +# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+dotprod --disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=-dotprod --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR + +0x12,0x0d,0x21,0xfc +0x02,0x0d,0x21,0xfc +0x58,0x0d,0x22,0xfc +0x48,0x0d,0x22,0xfc +0x12,0x0d,0x21,0xfe +0x22,0x0d,0x21,0xfe +0x54,0x0d,0x22,0xfe +0x64,0x0d,0x22,0xfe + +#CHECK: vudot.u8 d0, d1, d2 +#CHECK: vsdot.s8 d0, d1, d2 +#CHECK: vudot.u8 q0, q1, q4 +#CHECK: vsdot.s8 q0, q1, q4 +#CHECK: vudot.u8 d0, d1, d2[0] +#CHECK: vsdot.s8 d0, d1, d2[1] +#CHECK: vudot.u8 q0, q1, d4[0] +#CHECK: vsdot.s8 q0, q1, d4[1] + +# without dot product enabled, the instructions get disassembled to these +# coprocessor instructions: + +#CHECK-ERROR: stc2 p13, c0, [r1], #-72 +#CHECK-ERROR: stc2 p13, c0, [r1], #-8 +#CHECK-ERROR: stc2 p13, c0, [r2], #-352 +#CHECK-ERROR: stc2 p13, c0, [r2], #-288 +#CHECK-ERROR: mcr2 p13, #1, r0, c1, c2, #0 +#CHECK-ERROR: cdp2 p13, #2, c0, c1, c2, #1 +#CHECK-ERROR: mcr2 p13, #1, r0, c2, c4, #2 +#CHECK-ERROR: cdp2 p13, #2, c0, c2, c4, #3 + Index: llvm/trunk/test/MC/Disassembler/ARM/armv8.2a-dotprod-t32.s =================================================================== --- llvm/trunk/test/MC/Disassembler/ARM/armv8.2a-dotprod-t32.s +++ llvm/trunk/test/MC/Disassembler/ARM/armv8.2a-dotprod-t32.s @@ -0,0 +1,29 @@ +# RUN: llvm-mc -triple thumbv7a -mattr=+dotprod --disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple thumbv7a -mattr=-dotprod --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR + +[0x21,0xfc,0x12,0x0d] +[0x21,0xfc,0x02,0x0d] +[0x22,0xfc,0x58,0x0d] +[0x22,0xfc,0x48,0x0d] +[0x21,0xfe,0x12,0x0d] +[0x21,0xfe,0x22,0x0d] +[0x22,0xfe,0x54,0x0d] +[0x22,0xfe,0x64,0x0d] + +#CHECK: vudot.u8 d0, d1, d2 +#CHECK: vsdot.s8 d0, d1, d2 +#CHECK: vudot.u8 q0, q1, q4 +#CHECK: vsdot.s8 q0, q1, q4 +#CHECK: vudot.u8 d0, d1, d2[0] +#CHECK: vsdot.s8 d0, d1, d2[1] +#CHECK: vudot.u8 q0, q1, d4[0] +#CHECK: vsdot.s8 q0, q1, d4[1] + +#CHECK-ERROR: stc2 p13, c0, [r1], #-72 +#CHECK-ERROR: stc2 p13, c0, [r1], #-8 +#CHECK-ERROR: stc2 p13, c0, [r2], #-352 +#CHECK-ERROR: stc2 p13, c0, [r2], #-288 +#CHECK-ERROR: mcr2 p13, #1, r0, c1, c2, #0 +#CHECK-ERROR: cdp2 p13, #2, c0, c1, c2, #1 +#CHECK-ERROR: mcr2 p13, #1, r0, c2, c4, #2 +#CHECK-ERROR: cdp2 p13, #2, c0, c2, c4, #3