Index: clang/lib/Basic/Targets/AArch64.h =================================================================== --- clang/lib/Basic/Targets/AArch64.h +++ clang/lib/Basic/Targets/AArch64.h @@ -70,6 +70,8 @@ MacroBuilder &Builder) const; void getTargetDefinesARMV85A(const LangOptions &Opts, MacroBuilder &Builder) const; + void getTargetDefinesARMV86A(const LangOptions &Opts, + MacroBuilder &Builder) const; void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; Index: clang/lib/Basic/Targets/AArch64.cpp =================================================================== --- clang/lib/Basic/Targets/AArch64.cpp +++ clang/lib/Basic/Targets/AArch64.cpp @@ -178,6 +178,13 @@ getTargetDefinesARMV84A(Opts, Builder); } +void AArch64TargetInfo::getTargetDefinesARMV86A(const LangOptions &Opts, + MacroBuilder &Builder) const { + // Also include the Armv8.5 defines + // FIXME: Armv8.6 makes some extensions mandatory. Handle them here. + getTargetDefinesARMV85A(Opts, Builder); +} + void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { @@ -286,6 +293,9 @@ case llvm::AArch64::ArchKind::ARMV8_5A: getTargetDefinesARMV85A(Opts, Builder); break; + case llvm::AArch64::ArchKind::ARMV8_6A: + getTargetDefinesARMV86A(Opts, Builder); + break; } // All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work. @@ -340,6 +350,8 @@ ArchKind = llvm::AArch64::ArchKind::ARMV8_4A; if (Feature == "+v8.5a") ArchKind = llvm::AArch64::ArchKind::ARMV8_5A; + if (Feature == "+v8.6a") + ArchKind = llvm::AArch64::ArchKind::ARMV8_6A; if (Feature == "+fullfp16") HasFullFP16 = true; if (Feature == "+dotprod") Index: clang/lib/Basic/Targets/ARM.cpp =================================================================== --- clang/lib/Basic/Targets/ARM.cpp +++ clang/lib/Basic/Targets/ARM.cpp @@ -199,6 +199,8 @@ return "8_4A"; case llvm::ARM::ArchKind::ARMV8_5A: return "8_5A"; + case llvm::ARM::ArchKind::ARMV8_6A: + return "8_6A"; case llvm::ARM::ArchKind::ARMV8MBaseline: return "8M_BASE"; case llvm::ARM::ArchKind::ARMV8MMainline: @@ -817,6 +819,7 @@ case llvm::ARM::ArchKind::ARMV8_3A: case llvm::ARM::ArchKind::ARMV8_4A: case llvm::ARM::ArchKind::ARMV8_5A: + case llvm::ARM::ArchKind::ARMV8_6A: getTargetDefinesARMV83A(Opts, Builder); break; } Index: clang/test/Driver/aarch64-cpus.c =================================================================== --- clang/test/Driver/aarch64-cpus.c +++ clang/test/Driver/aarch64-cpus.c @@ -589,6 +589,29 @@ // RUN: %clang -target aarch64 -march=armv8.5-a+fp16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-FP16 %s // GENERICV85A-FP16: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.5a" "-target-feature" "+fullfp16" +// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s +// RUN: %clang -target aarch64 -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s +// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s +// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s +// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s +// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s +// GENERICV86A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.6a" + +// RUN: %clang -target aarch64_be -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s +// RUN: %clang -target aarch64_be -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s +// RUN: %clang -target aarch64 -mbig-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s +// RUN: %clang -target aarch64 -mbig-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s +// RUN: %clang -target aarch64_be -mbig-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s +// RUN: %clang -target aarch64_be -mbig-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s +// GENERICV86A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.6a" + +// The BFloat16 extension is a mandatory component of the Armv8.6-A extensions, but is permitted as an +// optional feature for any implementation of Armv8.2-A to Armv8.5-A (inclusive) +// RUN: %clang -target aarch64 -march=armv8.5a+bf16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-BF16 %s +// GENERICV85A-BF16: "-target-feature" "+bf16" +// RUN: %clang -target aarch64 -march=armv8.5a+bf16+nobf16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-BF16-NO-BF16 %s +// GENERICV85A-BF16-NO-BF16: "-target-feature" "-bf16" + // fullfp16 is off by default for v8a, feature must not be mentioned // RUN: %clang -target aarch64 -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s // RUN: %clang -target aarch64 -march=armv8-a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s Index: clang/test/Driver/arm-cortex-cpus.c =================================================================== --- clang/test/Driver/arm-cortex-cpus.c +++ clang/test/Driver/arm-cortex-cpus.c @@ -335,6 +335,23 @@ // RUN: %clang -target arm -march=armebv8.5-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V85A %s // CHECK-BE-V85A: "-cc1"{{.*}} "-triple" "armebv8.5{{.*}}" "-target-cpu" "generic" +// RUN: %clang -target armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s +// RUN: %clang -target arm -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s +// RUN: %clang -target arm -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s +// RUN: %clang -target arm -march=armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s +// RUN: %clang -target armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s +// RUN: %clang -target arm -march=armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s +// RUN: %clang -target arm -mlittle-endian -march=armv8.6-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s +// CHECK-V86A: "-cc1"{{.*}} "-triple" "armv8.6{{.*}}" "-target-cpu" "generic" + +// RUN: %clang -target armebv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s +// RUN: %clang -target armv8.6a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s +// RUN: %clang -target armeb -march=armebv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s +// RUN: %clang -target armeb -march=armebv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s +// RUN: %clang -target arm -march=armebv8.6a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s +// RUN: %clang -target arm -march=armebv8.6-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s +// CHECK-BE-V86A: "-cc1"{{.*}} "-triple" "armebv8.6{{.*}}" "-target-cpu" "generic" + // Once we have CPUs with optional v8.2-A FP16, we will need a way to turn it // on and off. Cortex-A53 is a placeholder for now. // RUN: %clang -target armv8a-linux-eabi -mcpu=cortex-a53+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-CORTEX-A53-FP16 %s @@ -432,6 +449,9 @@ // RUN: %clang -target armv8a-linux-eabi -march=armv8.5-a+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-V85A-FP16 %s // CHECK-V85A-FP16: "-cc1"{{.*}} "-triple" "armv8.5{{.*}}" "-target-cpu" "generic" {{.*}}"-target-feature" "+fullfp16" +// RUN: %clang -target armv8a-linux-eabi -march=armv8.6-a+bf16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-V86A-BF16 %s +// CHECK-V86A-BF16: "-cc1"{{.*}} "-triple" "armv8.6{{.*}}" "-target-cpu" "generic" {{.*}}"-target-feature" "+bf16" + // RUN: %clang -target arm -march=armv8.2-a+fp16 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s // RUN: %clang -target arm -march=armv8.2-a+fp16fml -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s // RUN: %clang -target arm -march=armv8.2-a+fp16+fp16fml -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s Index: clang/test/Preprocessor/arm-target-features.c =================================================================== --- clang/test/Preprocessor/arm-target-features.c +++ clang/test/Preprocessor/arm-target-features.c @@ -829,5 +829,10 @@ // CHECK-V85A: #define __ARM_ARCH_8_5A__ 1 // CHECK-V85A: #define __ARM_ARCH_PROFILE 'A' +// RUN: %clang -target armv8.6a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V86A %s +// CHECK-V86A: #define __ARM_ARCH 8 +// CHECK-V86A: #define __ARM_ARCH_8_6A__ 1 +// CHECK-V86A: #define __ARM_ARCH_PROFILE 'A' + // RUN: %clang -target arm-none-none-eabi -march=armv7-m -mfpu=softvfp -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SOFTVFP %s // CHECK-SOFTVFP-NOT: #define __ARM_FP 0x Index: llvm/include/llvm/ADT/Triple.h =================================================================== --- llvm/include/llvm/ADT/Triple.h +++ llvm/include/llvm/ADT/Triple.h @@ -101,6 +101,7 @@ enum SubArchType { NoSubArch, + ARMSubArch_v8_6a, ARMSubArch_v8_5a, ARMSubArch_v8_4a, ARMSubArch_v8_3a, Index: llvm/include/llvm/Support/AArch64TargetParser.h =================================================================== --- llvm/include/llvm/Support/AArch64TargetParser.h +++ llvm/include/llvm/Support/AArch64TargetParser.h @@ -55,6 +55,8 @@ AEK_SVE2SHA3 = 1 << 26, AEK_SVE2BITPERM = 1 << 27, AEK_TME = 1 << 28, + AEK_BF16 = 1 << 29, + AEK_I8MM = 1 << 30, }; enum class ArchKind { Index: llvm/include/llvm/Support/AArch64TargetParser.def =================================================================== --- llvm/include/llvm/Support/AArch64TargetParser.def +++ llvm/include/llvm/Support/AArch64TargetParser.def @@ -44,6 +44,13 @@ (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD)) +AARCH64_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a", + ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, + (AArch64::AEK_CRC | AArch64::AEK_FP | + AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | + AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 | + AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM)) #undef AARCH64_ARCH #ifndef AARCH64_ARCH_EXT_NAME @@ -79,6 +86,8 @@ AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs") AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb") AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") +AARCH64_ARCH_EXT_NAME("bf16", AArch64::AEK_BF16, "+bf16", "-bf16") +AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm") AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme") #undef AARCH64_ARCH_EXT_NAME Index: llvm/include/llvm/Support/ARMTargetParser.h =================================================================== --- llvm/include/llvm/Support/ARMTargetParser.h +++ llvm/include/llvm/Support/ARMTargetParser.h @@ -46,14 +46,15 @@ AEK_SB = 1 << 17, AEK_FP_DP = 1 << 18, AEK_LOB = 1 << 19, - AEK_CDECP0 = 1 << 20, - AEK_CDECP1 = 1 << 21, - AEK_CDECP2 = 1 << 22, - AEK_CDECP3 = 1 << 23, - AEK_CDECP4 = 1 << 24, - AEK_CDECP5 = 1 << 25, - AEK_CDECP6 = 1 << 26, - AEK_CDECP7 = 1 << 27, + AEK_BF16 = 1 << 20, + AEK_CDECP0 = 1 << 21, + AEK_CDECP1 = 1 << 22, + AEK_CDECP2 = 1 << 23, + AEK_CDECP3 = 1 << 24, + AEK_CDECP4 = 1 << 25, + AEK_CDECP5 = 1 << 26, + AEK_CDECP6 = 1 << 27, + AEK_CDECP7 = 1 << 28, // Unsupported extensions. AEK_OS = 1ULL << 59, Index: llvm/include/llvm/Support/ARMTargetParser.def =================================================================== --- llvm/include/llvm/Support/ARMTargetParser.def +++ llvm/include/llvm/Support/ARMTargetParser.def @@ -112,6 +112,11 @@ (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | ARM::AEK_DOTPROD)) +ARM_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a", + ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, + (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | + ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES)) ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R, FK_NEON_FP_ARMV8, (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | @@ -164,6 +169,7 @@ ARM_ARCH_EXT_NAME("maverick", ARM::AEK_MAVERICK, nullptr, nullptr) ARM_ARCH_EXT_NAME("xscale", ARM::AEK_XSCALE, nullptr, nullptr) ARM_ARCH_EXT_NAME("fp16fml", ARM::AEK_FP16FML, "+fp16fml", "-fp16fml") +ARM_ARCH_EXT_NAME("bf16", ARM::AEK_BF16, "+bf16", "-bf16") ARM_ARCH_EXT_NAME("sb", ARM::AEK_SB, "+sb", "-sb") ARM_ARCH_EXT_NAME("lob", ARM::AEK_LOB, "+lob", "-lob") ARM_ARCH_EXT_NAME("cdecp0", ARM::AEK_CDECP0, "+cdecp0", "-cdecp0") Index: llvm/lib/Support/AArch64TargetParser.cpp =================================================================== --- llvm/lib/Support/AArch64TargetParser.cpp +++ llvm/lib/Support/AArch64TargetParser.cpp @@ -116,6 +116,8 @@ Features.push_back("+v8.4a"); if (AK == ArchKind::ARMV8_5A) Features.push_back("+v8.5a"); + if (AK == AArch64::ArchKind::ARMV8_6A) + Features.push_back("+v8.6a"); return AK != ArchKind::INVALID; } Index: llvm/lib/Support/ARMTargetParser.cpp =================================================================== --- llvm/lib/Support/ARMTargetParser.cpp +++ llvm/lib/Support/ARMTargetParser.cpp @@ -74,6 +74,7 @@ case ArchKind::ARMV8_3A: case ArchKind::ARMV8_4A: case ArchKind::ARMV8_5A: + case ArchKind::ARMV8_6A: case ArchKind::ARMV8R: case ArchKind::ARMV8MBaseline: case ArchKind::ARMV8MMainline: @@ -108,6 +109,7 @@ case ArchKind::ARMV8_3A: case ArchKind::ARMV8_4A: case ArchKind::ARMV8_5A: + case ArchKind::ARMV8_6A: return ProfileKind::A; case ArchKind::ARMV2: case ArchKind::ARMV2A: @@ -150,6 +152,7 @@ .Case("v8.3a", "v8.3-a") .Case("v8.4a", "v8.4-a") .Case("v8.5a", "v8.5-a") + .Case("v8.6a", "v8.6-a") .Case("v8r", "v8-r") .Case("v8m.base", "v8-m.base") .Case("v8m.main", "v8-m.main") Index: llvm/lib/Support/Triple.cpp =================================================================== --- llvm/lib/Support/Triple.cpp +++ llvm/lib/Support/Triple.cpp @@ -626,6 +626,8 @@ return Triple::ARMSubArch_v8_4a; case ARM::ArchKind::ARMV8_5A: return Triple::ARMSubArch_v8_5a; + case ARM::ArchKind::ARMV8_6A: + return Triple::ARMSubArch_v8_6a; case ARM::ArchKind::ARMV8R: return Triple::ARMSubArch_v8r; case ARM::ArchKind::ARMV8MBaseline: Index: llvm/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/lib/Target/AArch64/AArch64.td +++ llvm/lib/Target/AArch64/AArch64.td @@ -365,6 +365,9 @@ "true", "Use an instruction sequence for taking the address of a global " "that allows a memory tag in the upper address bits">; +def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", + "true", "Enable BFloat16 Extension" >; + //===----------------------------------------------------------------------===// // Architectures. // @@ -391,8 +394,11 @@ "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions", [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict, FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist, - FeatureBranchTargetId] ->; + FeatureBranchTargetId]>; + +def HasV8_6aOps : SubtargetFeature< + "v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions", + [HasV8_5aOps, FeatureBF16]>; //===----------------------------------------------------------------------===// // Register File Description Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -7786,6 +7786,110 @@ let Inst{4-0} = Rd; } + +//---------------------------------------------------------------------------- +// Armv8.6 BFloat16 Extension +//---------------------------------------------------------------------------- +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in { + +class BaseSIMDThreeSameVectorBFDot + : BaseSIMDThreeSameVectorTied { + let AsmString = !strconcat(asm, + "{\t$Rd" # kind1 # ", $Rn" # kind2 # + ", $Rm" # kind2 # "}"); +} + +multiclass SIMDThreeSameVectorBFDot { + def v4f16 : BaseSIMDThreeSameVectorBFDot<0, U, asm, ".2s", ".4h", V64, + v2f32, v8i8>; + def v8f16 : BaseSIMDThreeSameVectorBFDot<1, U, asm, ".4s", ".8h", V128, + v4f32, v16i8>; +} + +class BaseSIMDThreeSameVectorBF16DotI + : BaseSIMDIndexedTied { + + bits<2> idx; + let Inst{21} = idx{0}; // L + let Inst{11} = idx{1}; // H +} + +multiclass SIMDThreeSameVectorBF16DotI { + + def v4f16 : BaseSIMDThreeSameVectorBF16DotI<0, U, asm, ".2s", ".4h", + ".2h", V64, v2f32, v8i8>; + def v8f16 : BaseSIMDThreeSameVectorBF16DotI<1, U, asm, ".4s", ".8h", + ".2h", V128, v4f32, v16i8>; +} + +class SIMDBF16MLAL + : BaseSIMDThreeSameVectorTied { // TODO: Add intrinsics + let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}"); +} + +class SIMDBF16MLALIndex + : I<(outs V128:$dst), + (ins V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx), asm, + "{\t$Rd.4s, $Rn.8h, $Rm.h$idx}", "$Rd = $dst", + []>, // TODO: Add intrinsics + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<4> Rm; + bits<3> idx; + + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-22} = 0b00111111; + let Inst{21-20} = idx{1-0}; + let Inst{19-16} = Rm; + let Inst{15-12} = 0b1111; + let Inst{11} = idx{2}; // H + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SIMDThreeSameVectorBF16MatrixMul + : BaseSIMDThreeSameVectorTied<1, 1, 0b010, 0b11101, + V128, asm, ".4s", + []> { + let AsmString = !strconcat(asm, "{\t$Rd", ".4s", ", $Rn", ".8h", + ", $Rm", ".8h", "}"); +} + +class SIMD_BFCVTN + : BaseSIMDMixedTwoVector<0, 0, 0b10, 0b10110, V128, V128, + "bfcvtn", ".4h", ".4s", + []>; + +class SIMD_BFCVTN2 + : BaseSIMDMixedTwoVectorTied<1, 0, 0b10, 0b10110, V128, V128, + "bfcvtn2", ".8h", ".4s", + []>; + +class BF16ToSinglePrecision + : I<(outs FPR16:$Rd), (ins FPR32:$Rn), asm, "\t$Rd, $Rn", "", []>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-10} = 0b0001111001100011010000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} +} // End of let mayStore = 0, mayLoad = 0, hasSideEffects = 0 + // ARMv8.2-A Dot Product Instructions (Indexed) class BaseSIMDThreeSameVectorDotIndex; def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, AssemblerPredicate<"HasV8_5aOps", "armv8.5a">; +def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, + AssemblerPredicate<"HasV8_6aOps", "armv8.6a">; def HasVH : Predicate<"Subtarget->hasVH()">, AssemblerPredicate<"FeatureVH", "vh">; @@ -142,6 +144,8 @@ AssemblerPredicate<"FeatureETE", "ete">; def HasTRBE : Predicate<"Subtarget->hasTRBE()">, AssemblerPredicate<"FeatureTRBE", "trbe">; +def HasBF16 : Predicate<"Subtarget->hasBF16()">, + AssemblerPredicate<"FeatureBF16", "bf16">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; @@ -746,6 +750,20 @@ defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>; } +// ARMv8.6-A BFloat +let Predicates = [HasBF16] in { +defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">; +defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">; +def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">; +def BFMLALB : SIMDBF16MLAL<0, "bfmlalb">; +def BFMLALT : SIMDBF16MLAL<1, "bfmlalt">; +def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb">; +def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt">; +def BFCVTN : SIMD_BFCVTN; +def BFCVTN2 : SIMD_BFCVTN2; +def BFCVT : BF16ToSinglePrecision<"bfcvt">; +} + // ARMv8.2-A FP16 Fused Multiply-Add Long let Predicates = [HasNEON, HasFP16FML] in { defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1160,6 +1160,18 @@ defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>; defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>; + let Predicates = [HasBF16, HasSVE] in { + def BFDOT_ZZZ : sve_bfloat_dot<"bfdot">; + def BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot">; + def BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla">; + def BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb">; + def BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt">; + def BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb">; + def BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt">; + def BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt">; + def BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt">; + } + // InstAliases def : InstAlias<"mov $Zd, $Zn", (ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>; Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -75,6 +75,7 @@ bool HasV8_3aOps = false; bool HasV8_4aOps = false; bool HasV8_5aOps = false; + bool HasV8_6aOps = false; bool HasFPARMv8 = false; bool HasNEON = false; @@ -143,6 +144,9 @@ bool HasMTE = false; bool HasTME = false; + // Armv8.6-A Extensions + bool HasBF16 = false; + // Arm SVE2 extensions bool HasSVE2AES = false; bool HasSVE2SM4 = false; @@ -402,6 +406,9 @@ bool hasSVE2SHA3() const { return HasSVE2SHA3; } bool hasSVE2BitPerm() const { return HasSVE2BitPerm; } + // Armv8.6-A Extensions + bool hasBF16() const { return HasBF16; } + bool isLittleEndian() const { return IsLittle; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } Index: llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp =================================================================== --- llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2859,6 +2859,8 @@ Str += "ARMv8.4a"; else if (FBS[AArch64::HasV8_5aOps]) Str += "ARMv8.5a"; + else if (FBS[AArch64::HasV8_6aOps]) + Str += "ARMv8.6a"; else { auto ext = std::find_if(std::begin(ExtensionMap), std::end(ExtensionMap), @@ -5094,6 +5096,7 @@ break; case AArch64::ArchKind::ARMV8_4A: case AArch64::ArchKind::ARMV8_5A: + case AArch64::ArchKind::ARMV8_6A: RequestedExtensions.push_back("sm4"); RequestedExtensions.push_back("sha3"); RequestedExtensions.push_back("sha2"); @@ -5113,6 +5116,7 @@ break; case AArch64::ArchKind::ARMV8_4A: case AArch64::ArchKind::ARMV8_5A: + case AArch64::ArchKind::ARMV8_6A: RequestedExtensions.push_back("nosm4"); RequestedExtensions.push_back("nosha3"); RequestedExtensions.push_back("nosha2"); Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -7303,6 +7303,96 @@ def : SVE_1_Op_Pat(NAME)>; } +//===----------------------------------------------------------------------===// +// SVE BFloat16 Group +//===----------------------------------------------------------------------===// + +class sve_bfloat_dot_base opc, string asm, string ops, dag iops> +: I<(outs ZPR32:$Zda), iops, asm, ops, "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + let Inst{31-21} = 0b01100100011; + let Inst{15-14} = opc; + let Inst{13-10} = 0b0000; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = DestructiveOther; + let ElementSize = ElementSizeH; +} + +class sve_bfloat_dot +: sve_bfloat_dot_base<0b10, asm, "\t$Zda, $Zn, $Zm", + (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm)> { + bits<5> Zm; + let Inst{20-16} = Zm; +} + +class sve_bfloat_dot_indexed +: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop", + (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop)> { + bits<2> iop; + bits<3> Zm; + let Inst{20-19} = iop; + let Inst{18-16} = Zm; +} + +class sve_bfloat_matmul +: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm), + asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zm; + bits<5> Zda; + bits<5> Zn; + let Inst{31-21} = 0b01100100011; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b111001; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = DestructiveOther; + let ElementSize = ElementSizeH; +} + +class sve_bfloat_matmul_longvecl +: sve_bfloat_matmul { + let Inst{23} = 0b1; + let Inst{14-13} = 0b00; + let Inst{10} = BT; +} + +class sve_bfloat_matmul_longvecl_idx +: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop", + (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH:$iop)> { + bits<3> iop; + bits<3> Zm; + let Inst{23} = 0b1; + let Inst{20-19} = iop{2-1}; + let Inst{18-16} = Zm; + let Inst{11} = iop{0}; + let Inst{10} = BT; +} + +class sve_bfloat_convert +: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn), + asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> { + bits<5> Zd; + bits<3> Pg; + bits<5> Zn; + let Inst{31-25} = 0b0110010; + let Inst{24} = N; + let Inst{23-13} = 0b10001010101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = DestructiveOther; + let hasSideEffects = 1; + let ElementSize = ElementSizeS; +} + /// Addressing modes def am_sve_indexed_s4 :ComplexPattern", [], [SDNPWantRoot]>; def am_sve_indexed_s6 :ComplexPattern", [], [SDNPWantRoot]>; Index: llvm/lib/Target/ARM/ARM.td =================================================================== --- llvm/lib/Target/ARM/ARM.td +++ llvm/lib/Target/ARM/ARM.td @@ -424,6 +424,10 @@ def FeatureSB : SubtargetFeature<"sb", "HasSB", "true", "Enable v8.5a Speculation Barrier" >; +// Armv8.6-A extensions +def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true", + "Enable support for BFloat16 instructions", [FeatureNEON]>; + // Armv8.1-M extensions def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true", @@ -523,6 +527,10 @@ "Support ARM v8.5a instructions", [HasV8_4aOps, FeatureSB]>; +def HasV8_6aOps : SubtargetFeature<"v8.6a", "HasV8_6aOps", "true", + "Support ARM v8.6a instructions", + [HasV8_5aOps, FeatureBF16]>; + def HasV8_1MMainlineOps : SubtargetFeature< "v8.1m.main", "HasV8_1MMainlineOps", "true", "Support ARM v8-1M Mainline instructions", @@ -797,6 +805,19 @@ FeatureCRC, FeatureRAS, FeatureDotProd]>; +def ARMv86a : Architecture<"armv8.6-a", "ARMv86a", [HasV8_6aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, FeatureRClass, Index: llvm/lib/Target/ARM/ARMInstrNEON.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrNEON.td +++ llvm/lib/Target/ARM/ARMInstrNEON.td @@ -8926,3 +8926,95 @@ (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; + +// ARMv8.6a BFloat16 instructions. +let Predicates = [HasBF16, HasNEON] in { +class BF16VDOT op27_23, bits<2> op21_20, bit op6, + dag oops, dag iops> + : N3Vnp +{ + + let hasNoSchedulingInfo = 1; + let DecoderNamespace = "VFPV8"; +} + +class BF16VDOTS + : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst), + (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm)> { + let Constraints = "$dst = $Vd"; + let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); + let DecoderNamespace = "VFPV8"; +} + +multiclass BF16VDOTI { + + def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst), + (ins RegTy:$Vd, RegTy:$Vn, + DPR_VFP2:$Vm, VectorIndex32:$lane)> { + bit lane; + let Inst{5} = lane; + let Constraints = "$dst = $Vd"; + let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane"); + let DecoderNamespace = "VFPV8"; + } + +} + +def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v8i8>; +def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v16i8>; + +defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v8i8, (v2f32 DPR_VFP2:$Vm)>; +defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; + +class BF16MM + : N3Vnp<0b11000, 0b00, 0b1100, Q, 0, + (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), + N3RegFrm, IIC_VDOTPROD, "", "", []> { + let Constraints = "$dst = $Vd"; + let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); + let DecoderNamespace = "VFPV8"; + let hasNoSchedulingInfo = 1; +} + +def VMMLA : BF16MM<1, QPR, "vmmla">; + +class VBF16MALQ + : N3VCP8<0b00, 0b11, T, 1, + (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), + NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "", + []> { // TODO: Add intrinsics + let Constraints = "$dst = $Vd"; + let DecoderNamespace = "VFPV8"; + let hasNoSchedulingInfo = 1; +} + +def VBF16MALTQ: VBF16MALQ<1, "t">; +def VBF16MALBQ: VBF16MALQ<0, "b">; + +multiclass VBF16MALQI { + def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst), + (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), + IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> { + bits<2> idx; + let Inst{5} = idx{1}; + let Inst{3} = idx{0}; + let Constraints = "$dst = $Vd"; + let DecoderNamespace = "VFPV8"; + let hasNoSchedulingInfo = 1; + } + +} + +defm VBF16MALTQI: VBF16MALQI<1, "t">; +defm VBF16MALBQI: VBF16MALQI<0, "b">; + +let hasNoSchedulingInfo = 1 in { +def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0, + (outs DPR:$Vd), (ins QPR:$Vm), + NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>; +} +} +// End of BFloat16 instructions Index: llvm/lib/Target/ARM/ARMInstrVFP.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrVFP.td +++ llvm/lib/Target/ARM/ARMInstrVFP.td @@ -1867,6 +1867,35 @@ } // End of 'let Constraints = "$a = $dst" in' +// BFloat16 - Single precision, unary, predicated +class BF16_VCVT op7_6> + : VFPAI<(outs SPR:$Sd), (ins SPR:$dst, SPR:$Sm), + VFPUnaryFrm, NoItinerary, + opc, ".bf16.f32\t$Sd, $Sm", []>, + RegConstraint<"$dst = $Sd">, + Requires<[HasBF16]>, + Sched<[]> { + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = 0b11101; // opcode1 + let Inst{21-20} = 0b11; // opcode2 + let Inst{19-16} = 0b0011; // opcode3 + let Inst{11-8} = 0b1001; + let Inst{7-6} = op7_6; + let Inst{4} = 0; + let DecoderNamespace = "VFPV8"; +} + +def BF16_VCVTB : BF16_VCVT<"vcvtb", 0b01>; +def BF16_VCVTT : BF16_VCVT<"vcvtt", 0b11>; + //===----------------------------------------------------------------------===// // FP Multiply-Accumulate Operations. // Index: llvm/lib/Target/ARM/ARMPredicates.td =================================================================== --- llvm/lib/Target/ARM/ARMPredicates.td +++ llvm/lib/Target/ARM/ARMPredicates.td @@ -72,6 +72,8 @@ AssemblerPredicate<"HasV8_4aOps", "armv8.4a">; def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, AssemblerPredicate<"HasV8_5aOps", "armv8.5a">; +def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, + AssemblerPredicate<"HasV8_6aOps", "armv8.6a">; def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">, AssemblerPredicate<"FeatureVFP2_SP", "VFP2">; @@ -106,6 +108,8 @@ AssemblerPredicate<"FeatureFullFP16","full half-float">; def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, AssemblerPredicate<"FeatureFP16FML","full half-float fml">; +def HasBF16 : Predicate<"Subtarget->hasBF16()">, + AssemblerPredicate<"FeatureBF16","BFloat16 floating point extension">; def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">, AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">; def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, Index: llvm/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/lib/Target/ARM/ARMSubtarget.h +++ llvm/lib/Target/ARM/ARMSubtarget.h @@ -108,6 +108,7 @@ ARMv83a, ARMv84a, ARMv85a, + ARMv86a, ARMv8a, ARMv8mBaseline, ARMv8mMainline, @@ -157,6 +158,7 @@ bool HasV8_3aOps = false; bool HasV8_4aOps = false; bool HasV8_5aOps = false; + bool HasV8_6aOps = false; bool HasV8MBaselineOps = false; bool HasV8MMainlineOps = false; bool HasV8_1MMainlineOps = false; @@ -255,6 +257,9 @@ /// HasFP16FML - True if subtarget supports half-precision FP fml operations bool HasFP16FML = false; + /// HasBF16 - True if subtarget supports BFloat16 floating point + bool HasBF16 = false; + /// HasD32 - True if subtarget has the full 32 double precision /// FP registers for VFPv3. bool HasD32 = false; @@ -581,6 +586,7 @@ bool hasV8_3aOps() const { return HasV8_3aOps; } bool hasV8_4aOps() const { return HasV8_4aOps; } bool hasV8_5aOps() const { return HasV8_5aOps; } + bool hasV8_6aOps() const { return HasV8_6aOps; } bool hasV8MBaselineOps() const { return HasV8MBaselineOps; } bool hasV8MMainlineOps() const { return HasV8MMainlineOps; } bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; } Index: llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp =================================================================== --- llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -6307,6 +6307,7 @@ Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" || Mnemonic == "bxns" || Mnemonic == "blxns" || + Mnemonic == "vdot" || Mnemonic == "vmmla" || Mnemonic == "vudot" || Mnemonic == "vsdot" || Mnemonic == "vcmla" || Mnemonic == "vcadd" || Mnemonic == "vfmal" || Mnemonic == "vfmsl" || @@ -6447,6 +6448,8 @@ Mnemonic == "vudot" || Mnemonic == "vsdot" || Mnemonic == "vcmla" || Mnemonic == "vcadd" || Mnemonic == "vfmal" || Mnemonic == "vfmsl" || + Mnemonic == "vfmat" || Mnemonic == "vfmab" || + Mnemonic == "vdot" || Mnemonic == "vmmla" || Mnemonic == "sb" || Mnemonic == "ssbb" || Mnemonic == "pssbb" || Mnemonic == "bfcsel" || Mnemonic == "wls" || Index: llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp =================================================================== --- llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -856,6 +856,7 @@ case ARM::ArchKind::ARMV8_3A: case ARM::ArchKind::ARMV8_4A: case ARM::ArchKind::ARMV8_5A: + case ARM::ArchKind::ARMV8_6A: setAttributeItem(CPU_arch_profile, ApplicationProfile, false); setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); Index: llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s + +bfcvt z0.s, p0/m, z1.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfcvt z0.s, p0/m, z1.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvt z0.h, p0/m, z1.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfcvt z0.h, p0/m, z1.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvt z0.h, p0/z, z1.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfcvt z0.h, p0/z, z1.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvt z0.h, p8/m, z1.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfcvt z0.h, p8/m, z1.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.h, p0/m, z7.h +bfcvt z0.h, p0/m, z1.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size +// CHECK-NEXT: bfcvt z0.h, p0/m, z1.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: Index: llvm/test/MC/AArch64/SVE/bfcvt.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/SVE/bfcvt.s @@ -0,0 +1,29 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +bfcvt z0.H, p0/m, z1.S +// CHECK-INST: bfcvt z0.h, p0/m, z1.s +// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0.S, p0/m, z2.S +// CHECK-INST: movprfx z0.s, p0/m, z2.s +// CHECK-ENCODING: [0x40,0x20,0x91,0x04] +// CHECK-ERROR: instruction requires: sve + +bfcvt z0.H, p0/m, z1.S +// CHECK-INST: bfcvt z0.h, p0/m, z1.s +// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0, z2 +// CHECK-INST: movprfx z0, z2 +// CHECK-ENCODING: [0x40,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfcvt z0.H, p0/m, z1.S +// CHECK-INST: bfcvt z0.h, p0/m, z1.s +// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65] +// CHECK-ERROR: instruction requires: bf16 sve Index: llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s + +bfcvtnt z0.s, p0/m, z1.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfcvtnt z0.s, p0/m, z1.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvtnt z0.h, p0/m, z1.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvtnt z0.h, p0/z, z1.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfcvtnt z0.h, p0/z, z1.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvtnt z0.h, p8/m, z1.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfcvtnt z0.h, p8/m, z1.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.h, p0/m, z7.h +bfcvtnt z0.h, p0/m, z1.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size +// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: Index: llvm/test/MC/AArch64/SVE/bfcvtnt.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/SVE/bfcvtnt.s @@ -0,0 +1,29 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +bfcvtnt z0.H, p0/m, z1.S +// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s +// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0.S, p0/m, z2.S +// CHECK-INST: movprfx z0.s, p0/m, z2.s +// CHECK-ENCODING: [0x40,0x20,0x91,0x04] +// CHECK-ERROR: instruction requires: sve + +bfcvtnt z0.H, p0/m, z1.S +// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s +// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0, z2 +// CHECK-INST: movprfx z0, z2 +// CHECK-ENCODING: [0x40,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfcvtnt z0.H, p0/m, z1.S +// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s +// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64] +// CHECK-ERROR: instruction requires: bf16 sve Index: llvm/test/MC/AArch64/SVE/bfdot-diagnostics.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/SVE/bfdot-diagnostics.s @@ -0,0 +1,53 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s + +bfdot z0.s, z1.s, z2.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfdot z0.s, z1.s, z2.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot z0.h, z1.h, z2.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfdot z0.h, z1.h, z2.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot z0.s, z1.h, z2.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h +// CHECK-NEXT: bfdot z0.s, z1.h, z2.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.s, p0/m, z7.s +bfdot z0.s, z1.h, z2.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +// CHECK-NEXT: bfdot z0.s, z1.h, z2.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot z0.s, z1.s, z2.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfdot z0.s, z1.s, z2.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot z0.h, z1.h, z2.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfdot z0.h, z1.h, z2.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot z0.s, z1.h, z2.s[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h +// CHECK-NEXT: bfdot z0.s, z1.h, z2.s[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot z0.s, z1.h, z8.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfdot z0.s, z1.h, z8.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot z0.s, z1.h, z2.h[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: bfdot z0.s, z1.h, z2.h[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.s, p0/m, z7.s +bfdot z0.s, z1.h, z2.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +// CHECK-NEXT: bfdot z0.s, z1.h, z2.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: Index: llvm/test/MC/AArch64/SVE/bfdot.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/SVE/bfdot.s @@ -0,0 +1,52 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +bfdot z0.S, z1.H, z2.H +// CHECK-INST: bfdot z0.s, z1.h, z2.h +// CHECK-ENCODING: [0x20,0x80,0x62,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +bfdot z0.S, z1.H, z2.H[0] +// CHECK-INST: bfdot z0.s, z1.h, z2.h[0] +// CHECK-ENCODING: [0x20,0x40,0x62,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +bfdot z0.S, z1.H, z2.H[3] +// CHECK-INST: bfdot z0.s, z1.h, z2.h[3] +// CHECK-ENCODING: [0x20,0x40,0x7a,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +// --------------------------------------------------------------------------// +// Test compatibility with MOVPRFX instruction. + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfdot z0.S, z1.H, z2.H +// CHECK-INST: bfdot z0.s, z1.h, z2.h +// CHECK-ENCODING: [0x20,0x80,0x62,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfdot z0.S, z1.H, z2.H[0] +// CHECK-INST: bfdot z0.s, z1.h, z2.h[0] +// CHECK-ENCODING: [0x20,0x40,0x62,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfdot z0.S, z1.H, z2.H[3] +// CHECK-INST: bfdot z0.s, z1.h, z2.h[3] +// CHECK-ENCODING: [0x20,0x40,0x7a,0x64] +// CHECK-ERROR: instruction requires: bf16 sve Index: llvm/test/MC/AArch64/SVE/bfmlal-diagnostics.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/SVE/bfmlal-diagnostics.s @@ -0,0 +1,42 @@ +// RUN: not llvm-mc -o - -triple=aarch64 -mattr=+sve,bf16 2>&1 %s | FileCheck %s + +bfmlalb z0.S, z1.H, z7.H[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: bfmlalb z0.S, z1.H, z7.H[8] +// CHECK-NEXT: ^ + +bfmlalb z0.S, z1.H, z8.H[7] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmlalb z0.S, z1.H, z8.H[7] +// CHECK-NEXT: ^ + +bfmlalt z0.S, z1.H, z7.H[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: bfmlalt z0.S, z1.H, z7.H[8] +// CHECK-NEXT: ^ + +bfmlalt z0.S, z1.H, z8.H[7] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmlalt z0.S, z1.H, z8.H[7] +// CHECK-NEXT: ^ + +bfmlalt z0.S, z1.H, z7.2h[2] +// CHECK: error: invalid vector kind qualifier +// CHECK-NEXT: bfmlalt z0.S, z1.H, z7.2h[2] +// CHECK-NEXT: ^ + +bfmlalt z0.S, z1.H, z2.s[2] +// CHECK: error: Invalid restricted vector register, expected z0.h..z7.h +// CHECK-NEXT: bfmlalt z0.S, z1.H, z2.s[2] +// CHECK-NEXT: ^ + +bfmlalt z0.S, z1.s, z2.h[2] +// CHECK: error: invalid element width +// CHECK-NEXT: bfmlalt z0.S, z1.s, z2.h[2] +// CHECK-NEXT: ^ + +movprfx z0.s, p0/m, z7.s +bfmlalt z0.s, z1.h, z2.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx +// CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h +// CHECK-NEXT: ^ Index: llvm/test/MC/AArch64/SVE/bfmlal.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/SVE/bfmlal.s @@ -0,0 +1,157 @@ +// RUN: llvm-mc -o - -triple=aarch64 -show-encoding -mattr=+sve,+bf16 %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -o - -triple=aarch64 -show-encoding %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +bfmlalb z0.S, z1.H, z2.H +// CHECK-INST: bfmlalb z0.s, z1.h, z2.h +// CHECK-ENCODING: [0x20,0x80,0xe2,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +bfmlalt z0.S, z1.H, z2.H +// CHECK-INST: bfmlalt z0.s, z1.h, z2.h +// CHECK-ENCODING: [0x20,0x84,0xe2,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +bfmlalb z0.S, z1.H, z2.H[0] +// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[0] +// CHECK-ENCODING: [0x20,0x40,0xe2,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +bfmlalt z0.S, z1.H, z2.H[0] +// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[0] +// CHECK-ENCODING: [0x20,0x44,0xe2,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +bfmlalb z0.S, z1.H, z2.H[7] +// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[7] +// CHECK-ENCODING: [0x20,0x48,0xfa,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +bfmlalt z0.S, z1.H, z2.H[7] +// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[7] +// CHECK-ENCODING: [0x20,0x4c,0xfa,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +bfmlalt z0.S, z1.H, z7.H[7] +// CHECK-INST: bfmlalt z0.s, z1.h, z7.h[7] +// CHECK-ENCODING: [0x20,0x4c,0xff,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +bfmlalb z10.S, z21.H, z14.H +// CHECK-INST: bfmlalb z10.s, z21.h, z14.h +// CHECK-ENCODING: [0xaa,0x82,0xee,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +bfmlalt z14.S, z10.H, z21.H +// CHECK-INST: bfmlalt z14.s, z10.h, z21.h +// CHECK-ENCODING: [0x4e,0x85,0xf5,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +bfmlalb z21.s, z14.h, z3.h[2] +// CHECK-INST: bfmlalb z21.s, z14.h, z3.h[2] +// CHECK-ENCODING: [0xd5,0x41,0xeb,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +// --------------------------------------------------------------------------// +// Test compatibility with MOVPRFX instruction. + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfmlalb z0.S, z1.H, z2.H +// CHECK-INST: bfmlalb z0.s, z1.h, z2.h +// CHECK-ENCODING: [0x20,0x80,0xe2,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfmlalt z0.S, z1.H, z2.H +// CHECK-INST: bfmlalt z0.s, z1.h, z2.h +// CHECK-ENCODING: [0x20,0x84,0xe2,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfmlalb z0.S, z1.H, z2.H[0] +// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[0] +// CHECK-ENCODING: [0x20,0x40,0xe2,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfmlalt z0.S, z1.H, z2.H[0] +// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[0] +// CHECK-ENCODING: [0x20,0x44,0xe2,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfmlalb z0.S, z1.H, z2.H[7] +// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[7] +// CHECK-ENCODING: [0x20,0x48,0xfa,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfmlalt z0.S, z1.H, z2.H[7] +// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[7] +// CHECK-ENCODING: [0x20,0x4c,0xfa,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfmlalt z0.S, z1.H, z7.H[7] +// CHECK-INST: bfmlalt z0.s, z1.h, z7.h[7] +// CHECK-ENCODING: [0x20,0x4c,0xff,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z10, z7 +// CHECK-INST: movprfx z10, z7 +// CHECK-ENCODING: [0xea,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfmlalb z10.S, z21.H, z14.H +// CHECK-INST: bfmlalb z10.s, z21.h, z14.h +// CHECK-ENCODING: [0xaa,0x82,0xee,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z14, z7 +// CHECK-INST: movprfx z14, z7 +// CHECK-ENCODING: [0xee,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfmlalt z14.S, z10.H, z21.H +// CHECK-INST: bfmlalt z14.s, z10.h, z21.h +// CHECK-ENCODING: [0x4e,0x85,0xf5,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +movprfx z21, z7 +// CHECK-INST: movprfx z21, z7 +// CHECK-ENCODING: [0xf5,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfmlalb z21.s, z14.h, z3.h[2] +// CHECK-INST: bfmlalb z21.s, z14.h, z3.h[2] +// CHECK-ENCODING: [0xd5,0x41,0xeb,0x64] +// CHECK-ERROR: instruction requires: bf16 sve Index: llvm/test/MC/AArch64/SVE/bfmmla-diagnostics.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/SVE/bfmmla-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s + +bfmmla z0.s, z1.s, z2.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmmla z0.s, z1.s, z2.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmmla z0.h, z1.h, z2.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmmla z0.h, z1.h, z2.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmmla z0.s, z1.h, z2.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmmla z0.s, z1.h, z2.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.s, p0/m, z7.s +bfmmla z0.s, z1.h, z2.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +// CHECK-NEXT: bfmmla z0.s, z1.h, z2.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: Index: llvm/test/MC/AArch64/SVE/bfmmla.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/SVE/bfmmla.s @@ -0,0 +1,22 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +bfmmla z0.S, z1.H, z2.H +// CHECK-INST: bfmmla z0.s, z1.h, z2.h +// CHECK-ENCODING: [0x20,0xe4,0x62,0x64] +// CHECK-ERROR: instruction requires: bf16 sve + +// --------------------------------------------------------------------------// +// Test compatibility with MOVPRFX instruction. + +movprfx z0, z7 +// CHECK-INST: movprfx z0, z7 +// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04] +// CHECK-ERROR: instruction requires: sve + +bfmmla z0.S, z1.H, z2.H +// CHECK-INST: bfmmla z0.s, z1.h, z2.h +// CHECK-ENCODING: [0x20,0xe4,0x62,0x64] +// CHECK-ERROR: instruction requires: bf16 sve Index: llvm/test/MC/AArch64/armv8.6a-bf16.s =================================================================== --- /dev/null +++ llvm/test/MC/AArch64/armv8.6a-bf16.s @@ -0,0 +1,115 @@ +// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+bf16 < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+v8.6a < %s | FileCheck %s +// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=-bf16 < %s 2>&1 | FileCheck %s --check-prefix=NOBF16 +// RUN: not llvm-mc -triple aarch64 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=NOBF16 + + +bfdot v2.2s, v3.4h, v4.4h +bfdot v2.4s, v3.8h, v4.8h +// CHECK: bfdot v2.2s, v3.4h, v4.4h // encoding: [0x62,0xfc,0x44,0x2e] +// CHECK: bfdot v2.4s, v3.8h, v4.8h // encoding: [0x62,0xfc,0x44,0x6e] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.4h +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.8h + +bfdot v2.2s, v3.4h, v4.2h[0] +bfdot v2.2s, v3.4h, v4.2h[1] +bfdot v2.2s, v3.4h, v4.2h[2] +bfdot v2.2s, v3.4h, v4.2h[3] +// CHECK: bfdot v2.2s, v3.4h, v4.2h[0] // encoding: [0x62,0xf0,0x44,0x0f] +// CHECK: bfdot v2.2s, v3.4h, v4.2h[1] // encoding: [0x62,0xf0,0x64,0x0f] +// CHECK: bfdot v2.2s, v3.4h, v4.2h[2] // encoding: [0x62,0xf8,0x44,0x0f] +// CHECK: bfdot v2.2s, v3.4h, v4.2h[3] // encoding: [0x62,0xf8,0x64,0x0f] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[0] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[1] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[2] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[3] + + +bfdot v2.4s, v3.8h, v4.2h[0] +bfdot v2.4s, v3.8h, v4.2h[1] +bfdot v2.4s, v3.8h, v4.2h[2] +bfdot v2.4s, v3.8h, v4.2h[3] +// CHECK: bfdot v2.4s, v3.8h, v4.2h[0] // encoding: [0x62,0xf0,0x44,0x4f] +// CHECK: bfdot v2.4s, v3.8h, v4.2h[1] // encoding: [0x62,0xf0,0x64,0x4f] +// CHECK: bfdot v2.4s, v3.8h, v4.2h[2] // encoding: [0x62,0xf8,0x44,0x4f] +// CHECK: bfdot v2.4s, v3.8h, v4.2h[3] // encoding: [0x62,0xf8,0x64,0x4f] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[0] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[1] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[2] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[3] + + +bfmmla v2.4s, v3.8h, v4.8h +bfmmla v3.4s, v4.8h, v5.8h +// CHECK: bfmmla v2.4s, v3.8h, v4.8h // encoding: [0x62,0xec,0x44,0x6e] +// CHECK: bfmmla v3.4s, v4.8h, v5.8h // encoding: [0x83,0xec,0x45,0x6e] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfmmla v2.4s, v3.8h, v4.8h +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfmmla v3.4s, v4.8h, v5.8h + +bfcvtn v5.4h, v5.4s +bfcvtn2 v5.8h, v5.4s +// CHECK: bfcvtn v5.4h, v5.4s // encoding: [0xa5,0x68,0xa1,0x0e] +// CHECK: bfcvtn2 v5.8h, v5.4s // encoding: [0xa5,0x68,0xa1,0x4e] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfcvtn v5.4h, v5.4s +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfcvtn2 v5.8h, v5.4s + +bfcvt h5, s3 +// CHECK: bfcvt h5, s3 // encoding: [0x65,0x40,0x63,0x1e] +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfcvt h5, s3 + +bfmlalb V10.4S, V21.8h, V14.8H +bfmlalt V21.4S, V14.8h, V10.8H +// CHECK: bfmlalb v10.4s, v21.8h, v14.8h // encoding: [0xaa,0xfe,0xce,0x2e] +// CHECK-NEXT: bfmlalt v21.4s, v14.8h, v10.8h // encoding: [0xd5,0xfd,0xca,0x6e] +// NOBF16: error: instruction requires: bf16 +// NOBF16-NEXT: bfmlalb V10.4S, V21.8h, V14.8H +// NOBF16-NEXT: ^ +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfmlalt V21.4S, V14.8h, V10.8H +// NOBF16-NEXT: ^ + +bfmlalb V14.4S, V21.8H, V10.H[1] +bfmlalb V14.4S, V21.8H, V10.H[2] +bfmlalb V14.4S, V21.8H, V10.H[7] +bfmlalt V21.4S, V10.8H, V14.H[1] +bfmlalt V21.4S, V10.8H, V14.H[2] +bfmlalt V21.4S, V10.8H, V14.H[7] +// CHECK: bfmlalb v14.4s, v21.8h, v10.h[1] // encoding: [0xae,0xf2,0xda,0x0f] +// CHECK-NEXT: bfmlalb v14.4s, v21.8h, v10.h[2] // encoding: [0xae,0xf2,0xea,0x0f] +// CHECK-NEXT: bfmlalb v14.4s, v21.8h, v10.h[7] // encoding: [0xae,0xfa,0xfa,0x0f] +// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[1] // encoding: [0x55,0xf1,0xde,0x4f] +// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[2] // encoding: [0x55,0xf1,0xee,0x4f] +// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[7] // encoding: [0x55,0xf9,0xfe,0x4f] +// NOBF16: error: instruction requires: bf16 +// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[1] +// NOBF16-NEXT: ^ +// NOBF16: error: instruction requires: bf16 +// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[2] +// NOBF16-NEXT: ^ +// NOBF16: error: instruction requires: bf16 +// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[7] +// NOBF16-NEXT: ^ +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[1] +// NOBF16-NEXT: ^ +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[2] +// NOBF16-NEXT: ^ +// NOBF16: instruction requires: bf16 +// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[7] +// NOBF16-NEXT: ^ Index: llvm/test/MC/ARM/bfloat16-a32-errors.s =================================================================== --- /dev/null +++ llvm/test/MC/ARM/bfloat16-a32-errors.s @@ -0,0 +1,57 @@ +// RUN: not llvm-mc -triple arm -mattr=+bf16,-neon %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NONEON,ALL +// RUN: not llvm-mc -triple arm -mattr=-bf16 %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NOBF16,ALL +// RUN: not llvm-mc -triple arm %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NONEON,ALL +// +vdot.bf16 d3, d4, d5 +vdot.bf16 q0, q1, q2 +vdot.bf16 d3, d4, d5[1] +vdot.bf16 q0, q1, d5[1] +vmmla.bf16 q0, q1, q2 +vcvt.bf16.f32 d1, q3 +vcvtbeq.bf16.f32 s1, s3 +vcvttne.bf16.f32 s1, s3 +// NOBF16: error: instruction requires: BFloat16 floating point extension +// NOBF16-NEXT: vdot.bf16 d3, d4, d5 +// NOBF16-NEXT: ^ +// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension +// NOBF16-NEXT: vdot.bf16 q0, q1, q2 +// NOBF16-NEXT: ^ +// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension +// NOBF16-NEXT: vdot.bf16 d3, d4, d5[1] +// NOBF16-NEXT: ^ +// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension +// NOBF16-NEXT: vdot.bf16 q0, q1, d5[1] +// NOBF16-NEXT: ^ +// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension +// NOBF16-NEXT: vmmla.bf16 q0, q1, q2 +// NOBF16-NEXT: ^ +// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension +// NOBF16-NEXT: vcvt.bf16.f32 d1, q3 +// NOBF16-NEXT: ^ + +// NONEON: error: instruction requires: BFloat16 floating point extension NEON +// NONEON-NEXT: vdot.bf16 d3, d4, d5 +// NONEON-NEXT: ^ +// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON +// NONEON-NEXT: vdot.bf16 q0, q1, q2 +// NONEON-NEXT: ^ +// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON +// NONEON-NEXT: vdot.bf16 d3, d4, d5[1] +// NONEON-NEXT: ^ +// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON +// NONEON-NEXT: vdot.bf16 q0, q1, d5[1] +// NONEON-NEXT: ^ +// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON +// NONEON-NEXT: vmmla.bf16 q0, q1, q2 +// NONEON-NEXT: ^ +// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON +// NONEON-NEXT: vcvt.bf16.f32 d1, q3 +// NONEON-NEXT: ^ + + +// ALL-NEXT: error: instruction requires: BFloat16 floating point extension +// ALL-NEXT: vcvtbeq.bf16.f32 s1, s3 +// ALL-NEXT: ^ +// ALL-NEXT: error: instruction requires: BFloat16 floating point extension +// ALL-NEXT: vcvttne.bf16.f32 s1, s3 +// ALL-NEXT: ^ Index: llvm/test/MC/ARM/bfloat16-a32-errors2.s =================================================================== --- /dev/null +++ llvm/test/MC/ARM/bfloat16-a32-errors2.s @@ -0,0 +1,134 @@ +// RUN: not llvm-mc -o - -triple arm -mattr=+v8.6a -show-encoding %s 2>&1 | FileCheck %s +vfmat.bf16 d0, d0, d0 +vfmat.bf16 d0, d0, q0 +vfmat.bf16 d0, q0, d0 +vfmat.bf16 q0, d0, d0 +vfmat.bf16 q0, q0, d0 +vfmat.bf16 q0, d0, q0 +vfmat.bf16 d0, q0, q0 +vfmat.bf16 q0, q0, q0[3] +vfmat.bf16 q0, q0, q0[3] +vfmat.bf16 q0, d0, d0[0] +vfmat.bf16 d0, q0, d0[0] +vfmat.bf16 q0, d0, d0[9] + +vfmab.bf16 d0, d0, d0 +vfmab.bf16 d0, d0, q0 +vfmab.bf16 d0, q0, d0 +vfmab.bf16 q0, d0, d0 +vfmab.bf16 q0, q0, d0 +vfmab.bf16 q0, d0, q0 +vfmab.bf16 d0, q0, q0 +vfmab.bf16 q0, q0, q0[3] +vfmab.bf16 q0, q0, q0[3] +vfmab.bf16 q0, d0, d0[0] +vfmab.bf16 d0, q0, d0[0] +vfmab.bf16 q0, d0, d0[9] + +//CHECK:error: invalid instruction +//CHECK-NEXT:vfmat.bf16 d0, d0, d0 +//CHECK-NEXT:^ +//CHECK-NEXT:error: invalid instruction +//CHECK-NEXT:vfmat.bf16 d0, d0, q0 +//CHECK-NEXT:^ +//CHECK-NEXT:error: invalid instruction +//CHECK-NEXT:vfmat.bf16 d0, q0, d0 +//CHECK-NEXT:^ +//CHECK-NEXT:error: invalid instruction +//CHECK-NEXT:vfmat.bf16 q0, d0, d0 +//CHECK-NEXT:^ +//CHECK-NEXT:error: invalid instruction, any one of the following would fix this: +//CHECK-NEXT:vfmat.bf16 q0, q0, d0 +//CHECK-NEXT:^ +//CHECK-NEXT:note: too few operands for instruction +//CHECK-NEXT:vfmat.bf16 q0, q0, d0 +//CHECK-NEXT: ^ +//CHECK-NEXT:note: operand must be a register in range [q0, q15] +//CHECK-NEXT:vfmat.bf16 q0, q0, d0 +//CHECK-NEXT: ^ +//CHECK-NEXT:error: operand must be a register in range [q0, q15] +//CHECK-NEXT:vfmat.bf16 q0, d0, q0 +//CHECK-NEXT: ^ +//CHECK-NEXT:error: operand must be a register in range [q0, q15] +//CHECK-NEXT:vfmat.bf16 d0, q0, q0 +//CHECK-NEXT: ^ +//CHECK-NEXT:error: invalid instruction, any one of the following would fix this: +//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3] +//CHECK-NEXT:^ +//CHECK-NEXT:note: operand must be a register in range [d0, d7] +//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3] +//CHECK-NEXT: ^ +//CHECK-NEXT:note: too many operands for instruction +//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3] +//CHECK-NEXT: ^ +//CHECK-NEXT:error: invalid instruction, any one of the following would fix this: +//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3] +//CHECK-NEXT:^ +//CHECK-NEXT:note: operand must be a register in range [d0, d7] +//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3] +//CHECK-NEXT: ^ +//CHECK-NEXT:note: too many operands for instruction +//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3] +//CHECK-NEXT: ^ +//CHECK-NEXT:error: operand must be a register in range [q0, q15] +//CHECK-NEXT:vfmat.bf16 q0, d0, d0[0] +//CHECK-NEXT: ^ +//CHECK-NEXT:error: operand must be a register in range [q0, q15] +//CHECK-NEXT:vfmat.bf16 d0, q0, d0[0] +//CHECK-NEXT: ^ +//CHECK-NEXT:error: invalid instruction +//CHECK-NEXT:vfmat.bf16 q0, d0, d0[9] +//CHECK-NEXT:^ +//CHECK-NEXT:error: invalid instruction +//CHECK-NEXT:vfmab.bf16 d0, d0, d0 +//CHECK-NEXT:^ +//CHECK-NEXT:error: invalid instruction +//CHECK-NEXT:vfmab.bf16 d0, d0, q0 +//CHECK-NEXT:^ +//CHECK-NEXT:error: invalid instruction +//CHECK-NEXT:vfmab.bf16 d0, q0, d0 +//CHECK-NEXT:^ +//CHECK-NEXT:error: invalid instruction +//CHECK-NEXT:vfmab.bf16 q0, d0, d0 +//CHECK-NEXT:^ +//CHECK-NEXT:error: invalid instruction, any one of the following would fix this: +//CHECK-NEXT:vfmab.bf16 q0, q0, d0 +//CHECK-NEXT:^ +//CHECK-NEXT:note: too few operands for instruction +//CHECK-NEXT:vfmab.bf16 q0, q0, d0 +//CHECK-NEXT: ^ +//CHECK-NEXT:note: operand must be a register in range [q0, q15] +//CHECK-NEXT:vfmab.bf16 q0, q0, d0 +//CHECK-NEXT: ^ +//CHECK-NEXT:error: operand must be a register in range [q0, q15] +//CHECK-NEXT:vfmab.bf16 q0, d0, q0 +//CHECK-NEXT: ^ +//CHECK-NEXT:error: operand must be a register in range [q0, q15] +//CHECK-NEXT:vfmab.bf16 d0, q0, q0 +//CHECK-NEXT: ^ +//CHECK-NEXT:error: invalid instruction, any one of the following would fix this: +//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3] +//CHECK-NEXT:^ +//CHECK-NEXT:note: operand must be a register in range [d0, d7] +//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3] +//CHECK-NEXT: ^ +//CHECK-NEXT:note: too many operands for instruction +//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3] +//CHECK-NEXT: ^ +//CHECK-NEXT:error: invalid instruction, any one of the following would fix this: +//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3] +//CHECK-NEXT:^ +//CHECK-NEXT:note: operand must be a register in range [d0, d7] +//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3] +//CHECK-NEXT: ^ +//CHECK-NEXT:note: too many operands for instruction +//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3] +//CHECK-NEXT: ^ +//CHECK-NEXT:error: operand must be a register in range [q0, q15] +//CHECK-NEXT:vfmab.bf16 q0, d0, d0[0] +//CHECK-NEXT: ^ +//CHECK-NEXT:error: operand must be a register in range [q0, q15] +//CHECK-NEXT:vfmab.bf16 d0, q0, d0[0] +//CHECK-NEXT: ^ +//CHECK-NEXT:error: invalid instruction +//CHECK-NEXT:vfmab.bf16 q0, d0, d0[9] Index: llvm/test/MC/ARM/bfloat16-a32.s =================================================================== --- /dev/null +++ llvm/test/MC/ARM/bfloat16-a32.s @@ -0,0 +1,55 @@ +// RUN: llvm-mc -triple arm -mattr=+bf16,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK +// RUN: llvm-mc -triple arm -mattr=+v8.6a -show-encoding < %s | FileCheck %s --check-prefix=CHECK + +vdot.bf16 d3, d4, d5 +// CHECK: vdot.bf16 d3, d4, d5 @ encoding: [0x05,0x3d,0x04,0xfc] +vdot.bf16 q0, q1, q2 +// CHECK-NEXT: vdot.bf16 q0, q1, q2 @ encoding: [0x44,0x0d,0x02,0xfc] +vdot.bf16 d3, d4, d5[1] +// CHECK-NEXT: vdot.bf16 d3, d4, d5[1] @ encoding: [0x25,0x3d,0x04,0xfe] +vdot.bf16 q0, q1, d5[1] +// CHECK-NEXT: vdot.bf16 q0, q1, d5[1] @ encoding: [0x65,0x0d,0x02,0xfe] +vmmla.bf16 q0, q1, q2 +// CHECK-NEXT: vmmla.bf16 q0, q1, q2 @ encoding: [0x44,0x0c,0x02,0xfc] +vcvt.bf16.f32 d1, q3 +// CHECK-NEXT: vcvt.bf16.f32 d1, q3 @ encoding: [0x46,0x16,0xb6,0xf3] +vcvtbeq.bf16.f32 s1, s3 +// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3 @ encoding: [0x61,0x09,0xf3,0x0e] +vcvttne.bf16.f32 s1, s3 +// CHECK-NEXT: vcvttne.bf16.f32 s1, s3 @ encoding: [0xe1,0x09,0xf3,0x1e] +vfmat.bf16 q0, q0, q0 +//CHECK-NEXT: vfmat.bf16 q0, q0, q0 @ encoding: [0x50,0x08,0x30,0xfc] +vfmat.bf16 q0, q0, q15 +//CHECK-NEXT: vfmat.bf16 q0, q0, q15 @ encoding: [0x7e,0x08,0x30,0xfc] +vfmat.bf16 q0, q15, q0 +//CHECK-NEXT: vfmat.bf16 q0, q15, q0 @ encoding: [0xd0,0x08,0x3e,0xfc] +vfmat.bf16 q0, q15, q15 +//CHECK-NEXT: vfmat.bf16 q0, q15, q15 @ encoding: [0xfe,0x08,0x3e,0xfc] +vfmat.bf16 q7, q0, q0 +//CHECK-NEXT: vfmat.bf16 q7, q0, q0 @ encoding: [0x50,0xe8,0x30,0xfc] +vfmat.bf16 q8, q0, q0 +//CHECK-NEXT: vfmat.bf16 q8, q0, q0 @ encoding: [0x50,0x08,0x70,0xfc] +vfmab.bf16 q0, q0, q0 +//CHECK-NEXT: vfmab.bf16 q0, q0, q0 @ encoding: [0x10,0x08,0x30,0xfc] +vfmab.bf16 q0, q0, q15 +//CHECK-NEXT: vfmab.bf16 q0, q0, q15 @ encoding: [0x3e,0x08,0x30,0xfc] +vfmab.bf16 q0, q15, q0 +//CHECK-NEXT: vfmab.bf16 q0, q15, q0 @ encoding: [0x90,0x08,0x3e,0xfc] +vfmab.bf16 q0, q15, q15 +//CHECK-NEXT: vfmab.bf16 q0, q15, q15 @ encoding: [0xbe,0x08,0x3e,0xfc] +vfmab.bf16 q7, q0, q0 +//CHECK-NEXT: vfmab.bf16 q7, q0, q0 @ encoding: [0x10,0xe8,0x30,0xfc] +vfmab.bf16 q8, q0, q0 +//CHECK-NEXT: vfmab.bf16 q8, q0, q0 @ encoding: [0x10,0x08,0x70,0xfc] +vfmat.bf16 q0, q0, d0[0] +//CHECK-NEXT: vfmat.bf16 q0, q0, d0[0] @ encoding: [0x50,0x08,0x30,0xfe] +vfmat.bf16 q0, q0, d0[3] +//CHECK-NEXT: vfmat.bf16 q0, q0, d0[3] @ encoding: [0x78,0x08,0x30,0xfe] +vfmat.bf16 q0, q0, d7[0] +//CHECK-NEXT: vfmat.bf16 q0, q0, d7[0] @ encoding: [0x57,0x08,0x30,0xfe] +vfmab.bf16 q0, q0, d0[0] +//CHECK-NEXT: vfmab.bf16 q0, q0, d0[0] @ encoding: [0x10,0x08,0x30,0xfe] +vfmab.bf16 q0, q0, d0[3] +//CHECK-NEXT: vfmab.bf16 q0, q0, d0[3] @ encoding: [0x38,0x08,0x30,0xfe] +vfmab.bf16 q0, q0, d7[0] +//CHECK-NEXT: vfmab.bf16 q0, q0, d7[0] @ encoding: [0x17,0x08,0x30,0xfe] Index: llvm/test/MC/ARM/bfloat16-t32-errors.s =================================================================== --- /dev/null +++ llvm/test/MC/ARM/bfloat16-t32-errors.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple thumbv8 -mattr=-bf16 < %s 2>&1 | FileCheck %s + +vdot.bf16 d3, d4, d5 +// CHECK: instruction requires: BFloat16 floating point extension +// CHECK-NEXT: vdot.bf16 d3, d4, d5 + +vdot.bf16 q0, q1, q2 +// CHECK: instruction requires: BFloat16 floating point extension +// CHECK-NEXT: vdot.bf16 q0, q1, q2 + +vdot.bf16 d3, d4, d5[1] +// CHECK: instruction requires: BFloat16 floating point extension +// CHECK-NEXT: vdot.bf16 d3, d4, d5[1] + +vdot.bf16 q0, q1, d5[1] +// CHECK: instruction requires: BFloat16 floating point extension +// CHECK-NEXT: vdot.bf16 q0, q1, d5[1] + +vmmla.bf16 q0, q1, q2 +// CHECK: instruction requires: BFloat16 floating point extension +// CHECK-NEXT: vmmla.bf16 q0, q1, q2 + +vcvt.bf16.f32 d1, q3 +// CHECK: instruction requires: BFloat16 floating point extension +// CHECK-NEXT: vcvt.bf16.f32 d1, q3 + +vcvtbeq.bf16.f32 s1, s3 +// CHECK: note: instruction requires: BFloat16 floating point extension +// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3 +vcvttne.bf16.f32 s1, s3 +// CHECK: note: instruction requires: BFloat16 floating point extension +// CHECK-NEXT: vcvttne.bf16.f32 s1, s3 Index: llvm/test/MC/ARM/bfloat16-t32.s =================================================================== --- /dev/null +++ llvm/test/MC/ARM/bfloat16-t32.s @@ -0,0 +1,15 @@ +// RUN: llvm-mc -triple thumbv8 -mattr=+bf16,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK +// RUN: llvm-mc -triple thumbv8 -mattr=+v8.6a -show-encoding < %s | FileCheck %s --check-prefix=CHECK + +vcvt.bf16.f32 d1, q3 +// CHECK: vcvt.bf16.f32 d1, q3 @ encoding: [0xb6,0xff,0x46,0x16] + +it eq +vcvtbeq.bf16.f32 s1, s3 +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3 @ encoding: [0xf3,0xee,0x61,0x09] + +it ne +vcvttne.bf16.f32 s1, s3 +// CHECK: it ne @ encoding: [0x18,0xbf] +// CHECK: vcvttne.bf16.f32 s1, s3 @ encoding: [0xf3,0xee,0xe1,0x09] Index: llvm/test/MC/Disassembler/AArch64/armv8.6a-bf16.txt =================================================================== --- /dev/null +++ llvm/test/MC/Disassembler/AArch64/armv8.6a-bf16.txt @@ -0,0 +1,74 @@ +# RUN: llvm-mc -triple=aarch64 -mattr=+bf16 -disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple=aarch64 -mattr=+v8.6a -disassemble < %s | FileCheck %s +# RUN: not llvm-mc -triple=aarch64 -mattr=-bf16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16 +# RUN: not llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16 + + +[0x62,0xfc,0x44,0x2e] +[0x62,0xfc,0x44,0x6e] +# CHECK: bfdot v2.2s, v3.4h, v4.4h +# CHECK: bfdot v2.4s, v3.8h, v4.8h +# NOBF16: warning: invalid instruction encoding +# NOBF16-NEXT: [0x62,0xfc,0x44,0x2e] +# NOBF16: warning: invalid instruction encoding +# NOBF16-NEXT: [0x62,0xfc,0x44,0x6e] + +[0x62,0xf0,0x44,0x4f] +[0x62,0xf0,0x64,0x4f] +[0x62,0xf8,0x44,0x4f] +[0x62,0xf8,0x64,0x4f] +# CHECK: bfdot v2.4s, v3.8h, v4.2h[0] +# CHECK: bfdot v2.4s, v3.8h, v4.2h[1] +# CHECK: bfdot v2.4s, v3.8h, v4.2h[2] +# CHECK: bfdot v2.4s, v3.8h, v4.2h[3] +# NOBF16: warning: invalid instruction encoding +# NOBF-NEXT: [0x62,0xf0,0x44,0x4f] +# NOBF16: warning: invalid instruction encoding +# NOBF6-NEXT: [0x62,0xf0,0x64,0x4f] +# NOBF16: warning: invalid instruction encoding +# NOBF6-NEXT: [0x62,0xf8,0x44,0x4f] +# NOBF16: warning: invalid instruction encoding +# NOBF6-NEXT: [0x62,0xf8,0x64,0x4f] + + +[0x62,0xf0,0x44,0x0f] +[0x62,0xf0,0x64,0x0f] +[0x62,0xf8,0x44,0x0f] +[0x62,0xf8,0x64,0x0f] +# CHECK: bfdot v2.2s, v3.4h, v4.2h[0] +# CHECK: bfdot v2.2s, v3.4h, v4.2h[1] +# CHECK: bfdot v2.2s, v3.4h, v4.2h[2] +# CHECK: bfdot v2.2s, v3.4h, v4.2h[3] +# NOBF16: warning: invalid instruction encoding +# NOBF-NEXT: [0x62,0xf0,0x44,0x0f] +# NOBF16: warning: invalid instruction encoding +# NOBF6-NEXT: [0x62,0xf0,0x64,0x0f] +# NOBF16: warning: invalid instruction encoding +# NOBF6-NEXT: [0x62,0xf8,0x44,0x0f] +# NOBF16: warning: invalid instruction encoding +# NOBF6-NEXT: [0x62,0xf8,0x64,0x0f] + + +[0x62,0xec,0x44,0x6e] +[0x83,0xec,0x45,0x6e] +# CHECK: bfmmla v2.4s, v3.8h, v4.8h +# CHECK: bfmmla v3.4s, v4.8h, v5.8h +# NOBF16: warning: invalid instruction encoding +NOBF16-NEXT: [0x62,0xec,0x44,0x6e] +# NOBF16: warning: invalid instruction encoding +# NOBF16-NEXT: [0x83,0xec,0x45,0x6e] + + +[0xa5,0x68,0xa1,0x0e] +[0xa5,0x68,0xa1,0x4e] +# CHECK: bfcvtn v5.4h, v5.4s +# CHECK: bfcvtn2 v5.8h, v5.4s +# NOBF16: warning: invalid instruction encoding +# NOBF16-NEXT: [0xa5,0x68,0xa1,0x0e] +# NOBF16: warning: invalid instruction encoding +# NOBF16-NEXT: [0xa5,0x68,0xa1,0x4e] + +[0x65, 0x40, 0x63, 0x1e] +# CHECK: bfcvt h5, s3 +# NOBF16: warning: invalid instruction encoding +# NOBF16-NEXT: [0x65, 0x40, 0x63, 0x1e] Index: llvm/test/MC/Disassembler/ARM/bfloat16-a32_1.txt =================================================================== --- /dev/null +++ llvm/test/MC/Disassembler/ARM/bfloat16-a32_1.txt @@ -0,0 +1,102 @@ +# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16 +# RUN: llvm-mc -triple arm-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16 +# +# Tests BFloat16 instruction decodings. +# Without BFloat16 enabled, some of these get disassembled to coprocessor instructions. +[0x25,0x3d,0x04,0xfe] +# CHECK: vdot.bf16 d3, d4, d5[1] +# NOBF16: cdp2 p13, #0, c3, c4, c5, #1 +# +[0x65,0x0d,0x02,0xfe] +# CHECK-NEXT: vdot.bf16 q0, q1, d5[1] +# NOBF16-NEXT: cdp2 p13, #0, c0, c2, c5, #3 +# +[0x61,0x09,0xf3,0x0e] +# CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3 +# NOBF16-NEXT: cdpeq p9, #15, c0, c3, c1, #3 +# +[0xe1,0x09,0xf3,0x1e] +# CHECK-NEXT: vcvttne.bf16.f32 s1, s3 +# NOBF16-NEXT: cdpne p9, #15, c0, c3, c1, #7 +# +[0x50,0x08,0x30,0xfc] +# CHECK-NEXT: vfmat.bf16 q0, q0, q0 +# NOBF16-NEXT: ldc2 p8, c0, [r0], #-320 +# +[0x7e,0x08,0x30,0xfc] +# CHECK-NEXT: vfmat.bf16 q0, q0, q15 +# NOBF16-NEXT: ldc2 p8, c0, [r0], #-504 +# +[0xd0,0x08,0x3e,0xfc] +# CHECK-NEXT: vfmat.bf16 q0, q15, q0 +# NOBF16-NEXT: ldc2 p8, c0, [lr], #-832 +# +[0xfe,0x08,0x3e,0xfc] +# CHECK-NEXT: vfmat.bf16 q0, q15, q15 +# NOBF16-NEXT: ldc2 p8, c0, [lr], #-1016 +# +[0xd0,0x08,0x30,0xfc] +# CHECK-NEXT: vfmat.bf16 q0, q8, q0 +# NOBF16-NEXT: ldc2 p8, c0, [r0], #-832 +# +[0x50,0xe8,0x30,0xfc] +# CHECK-NEXT: vfmat.bf16 q7, q0, q0 +# NOBF16-NEXT: ldc2 p8, c14, [r0], #-320 +# +[0x50,0x08,0x70,0xfc] +# CHECK-NEXT: vfmat.bf16 q8, q0, q0 +# NOBF16-NEXT: ldc2l p8, c0, [r0], #-320 +# +[0x10,0x08,0x30,0xfc] +# CHECK-NEXT: vfmab.bf16 q0, q0, q0 +# NOBF16-NEXT: ldc2 p8, c0, [r0], #-64 +# +[0x3e,0x08,0x30,0xfc] +# CHECK-NEXT: vfmab.bf16 q0, q0, q15 +# NOBF16-NEXT: ldc2 p8, c0, [r0], #-248 +# +[0x90,0x08,0x3e,0xfc] +# CHECK-NEXT: vfmab.bf16 q0, q15, q0 +# NOBF16-NEXT: ldc2 p8, c0, [lr], #-576 +# +[0xbe,0x08,0x3e,0xfc] +# CHECK-NEXT: vfmab.bf16 q0, q15, q15 +# NOBF16-NEXT: ldc2 p8, c0, [lr], #-760 +# +[0x90,0x08,0x30,0xfc] +# CHECK-NEXT: vfmab.bf16 q0, q8, q0 +# NOBF16-NEXT: ldc2 p8, c0, [r0], #-576 +# +[0x10,0xe8,0x30,0xfc] +# CHECK-NEXT: vfmab.bf16 q7, q0, q0 +# NOBF16-NEXT: ldc2 p8, c14, [r0], #-64 +# +[0x10,0x08,0x70,0xfc] +# CHECK-NEXT: vfmab.bf16 q8, q0, q0 +# NOBF16-NEXT: ldc2l p8, c0, [r0], #-64 +# +[0x50,0x08,0x30,0xfe] +# CHECK-NEXT: vfmat.bf16 q0, q0, d0[0] +# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c0, #2 +# +[0x78,0x08,0x30,0xfe] +# CHECK-NEXT: vfmat.bf16 q0, q0, d0[3] +# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c8, #3 +[0x57,0x08,0x30,0xfe] +# +# CHECK-NEXT: vfmat.bf16 q0, q0, d7[0] +# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c7, #2 +[0x10,0x08,0x30,0xfe] +# +# CHECK-NEXT: vfmab.bf16 q0, q0, d0[0] +# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c0, #0 +[0x38,0x08,0x30,0xfe] +# +# CHECK-NEXT: vfmab.bf16 q0, q0, d0[3] +# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c8, #1 +# +[0x17,0x08,0x30,0xfe] +# CHECK-NEXT: vfmab.bf16 q0, q0, d7[0] +# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c7, #0 Index: llvm/test/MC/Disassembler/ARM/bfloat16-a32_2.txt =================================================================== --- /dev/null +++ llvm/test/MC/Disassembler/ARM/bfloat16-a32_2.txt @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s +# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBF16 +# RUN: not llvm-mc -triple arm-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBF16 + +[0x05,0x3d,0x04,0xfc] +# CHECK: vdot.bf16 d3, d4, d5 +# CHECK-NOBF16: warning: invalid instruction encoding + +[0x44,0x0d,0x02,0xfc] +# CHECK: vdot.bf16 q0, q1, q2 +# CHECK-NOBF16: warning: invalid instruction encoding + +[0x44,0x0c,0x02,0xfc] +# CHECK: vmmla.bf16 q0, q1, q2 +# CHECK-NOBF16: warning: invalid instruction encoding + +[0x46,0x16,0xb6,0xf3] +# CHECK: vcvt.bf16.f32 d1, q3 +# CHECK-ERROR: warning: invalid instruction encoding Index: llvm/test/MC/Disassembler/ARM/bfloat16-t32.txt =================================================================== --- /dev/null +++ llvm/test/MC/Disassembler/ARM/bfloat16-t32.txt @@ -0,0 +1,25 @@ +# RUN: llvm-mc -triple thumbv8-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple thumbv8-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s + +[0x04,0xfc,0x05,0x3d] +[0x02,0xfc,0x44,0x0d] +# CHECK: vdot.bf16 d3, d4, d5 +# CHECK: vdot.bf16 q0, q1, q2 + +[0x04,0xfe,0x25,0x3d] +# CHECK: vdot.bf16 d3, d4, d5[1] + +[0x02,0xfe,0x65,0x0d] +# CHECK: vdot.bf16 q0, q1, d5[1] + +[0x02,0xfc,0x44,0x0c] +# CHECK: vmmla.bf16 q0, q1, q2 + +[0xb6,0xff,0x46,0x16] +# CHECK: vcvt.bf16.f32 d1, q3 + +[0xf3,0xee,0x61,0x09] +# CHECK: vcvtb.bf16.f32 s1, s3 + +[0xf3,0xee,0xe1,0x09] +# CHECK: vcvtt.bf16.f32 s1, s3 Index: llvm/test/MC/Disassembler/ARM/bfloat16-t32_errors.txt =================================================================== --- /dev/null +++ llvm/test/MC/Disassembler/ARM/bfloat16-t32_errors.txt @@ -0,0 +1,40 @@ +# RUN: not llvm-mc -triple thumbv8-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple thumbv8-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s + +[0x04,0xfc,0x05,0x3d] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x04,0xfc,0x05,0x3d] + +[0x02,0xfc,0x44,0x0d] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x02,0xfc,0x44,0x0d] + + +[0x04,0xfe,0x25,0x3d] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x04,0xfe,0x25,0x3d] + + +[0x02,0xfe,0x65,0x0d] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x02,0xfe,0x65,0x0d] + + +[0x02,0xfc,0x44,0x0c] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x02,0xfc,0x44,0x0c] + + +[0xb6,0xff,0x46,0x16] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0xb6,0xff,0x46,0x16] + + +[0xf3,0xee,0x61,0x09] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0xf3,0xee,0x61,0x09] + + +[0xf3,0xee,0xe1,0x09] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0xf3,0xee,0xe1,0x09] Index: llvm/unittests/Support/TargetParserTest.cpp =================================================================== --- llvm/unittests/Support/TargetParserTest.cpp +++ llvm/unittests/Support/TargetParserTest.cpp @@ -26,9 +26,9 @@ "armv7e-m", "armv7em", "armv8-a", "armv8", "armv8a", "armv8l", "armv8.1-a", "armv8.1a", "armv8.2-a", "armv8.2a", "armv8.3-a", "armv8.3a", "armv8.4-a", "armv8.4a", "armv8.5-a", - "armv8.5a", "armv8-r", "armv8r", "armv8-m.base", "armv8m.base", - "armv8-m.main", "armv8m.main", "iwmmxt", "iwmmxt2", "xscale", - "armv8.1-m.main", + "armv8.5a", "armv8.6-a", "armv8.6a", "armv8-r", "armv8r", + "armv8-m.base", "armv8m.base", "armv8-m.main", "armv8m.main", "iwmmxt", + "iwmmxt2", "xscale", "armv8.1-m.main", }; bool testARMCPU(StringRef CPUName, StringRef ExpectedArch, @@ -411,6 +411,9 @@ testARMArch("armv8.5-a", "generic", "v8.5a", ARMBuildAttrs::CPUArch::v8_A)); EXPECT_TRUE( + testARMArch("armv8.6-a", "generic", "v8.6a", + ARMBuildAttrs::CPUArch::v8_A)); + EXPECT_TRUE( testARMArch("armv8-r", "cortex-r52", "v8r", ARMBuildAttrs::CPUArch::v8_R)); EXPECT_TRUE( @@ -678,7 +681,7 @@ "v7", "v7a", "v7ve", "v7hl", "v7l", "v7-r", "v7r", "v7-m", "v7m", "v7k", "v7s", "v7e-m", "v7em", "v8-a", "v8", "v8a", "v8l", "v8.1-a", "v8.1a", "v8.2-a", "v8.2a", "v8.3-a", "v8.3a", "v8.4-a", - "v8.4a", "v8.5-a","v8.5a", "v8-r", "v8m.base", "v8m.main", "v8.1m.main" + "v8.4a", "v8.5-a","v8.5a", "v8.6-a", "v8.6a", "v8-r", "v8m.base", "v8m.main", "v8.1m.main" }; for (unsigned i = 0; i < array_lengthof(Arch); i++) { @@ -743,6 +746,7 @@ case ARM::ArchKind::ARMV8_3A: case ARM::ArchKind::ARMV8_4A: case ARM::ArchKind::ARMV8_5A: + case ARM::ArchKind::ARMV8_6A: EXPECT_EQ(ARM::ProfileKind::A, ARM::parseArchProfile(ARMArch[i])); break; default: @@ -1002,6 +1006,8 @@ ARMBuildAttrs::CPUArch::v8_A)); EXPECT_TRUE(testAArch64Arch("armv8.5-a", "generic", "v8.5a", ARMBuildAttrs::CPUArch::v8_A)); + EXPECT_TRUE(testAArch64Arch("armv8.6-a", "generic", "v8.6a", + ARMBuildAttrs::CPUArch::v8_A)); } bool testAArch64Extension(StringRef CPUName, AArch64::ArchKind AK,