diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -54,7 +54,8 @@ // Decode AArch64 features from string like +[no]featureA+[no]featureB+... static bool DecodeAArch64Features(const Driver &D, StringRef text, - std::vector &Features) { + std::vector &Features, + llvm::AArch64::ArchKind ArchKind) { SmallVector Split; text.split(Split, StringRef("+"), -1, false); @@ -66,6 +67,10 @@ D.Diag(clang::diag::err_drv_no_neon_modifier); else return false; + + // +sve implies +f32mm if the base architecture is v8.6A + if ((ArchKind == llvm::AArch64::ArchKind::ARMV8_6A) && Feature == "sve") + Features.push_back("+f32mm"); } return true; } @@ -76,6 +81,7 @@ std::vector &Features) { std::pair Split = Mcpu.split("+"); CPU = Split.first; + llvm::AArch64::ArchKind ArchKind = llvm::AArch64::ArchKind::ARMV8A; if (CPU == "native") CPU = llvm::sys::getHostCPUName(); @@ -83,7 +89,7 @@ if (CPU == "generic") { Features.push_back("+neon"); } else { - llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseCPUArch(CPU); + ArchKind = llvm::AArch64::parseCPUArch(CPU); if (!llvm::AArch64::getArchFeatures(ArchKind, Features)) return false; @@ -92,10 +98,11 @@ return false; } - if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features)) - return false; + if (Split.second.size() && + !DecodeAArch64Features(D, Split.second, Features, ArchKind)) + return false; - return true; + return true; } static bool @@ -108,7 +115,8 @@ llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseArch(Split.first); if (ArchKind == llvm::AArch64::ArchKind::INVALID || !llvm::AArch64::getArchFeatures(ArchKind, Features) || - (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features))) + (Split.second.size() && + !DecodeAArch64Features(D, Split.second, Features, ArchKind))) return false; return true; diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -636,6 +636,34 @@ // RUN: %clang -target aarch64 -march=armv8.5a+bf16+sve -### -c %s 2>&1 | FileCheck -check-prefixes=GENERICV85A-BF16-SVE %s // GENERICV85A-BF16-SVE: "-target-feature" "+bf16" "-target-feature" "+sve" +// The 8-bit integer matrix multiply extension is a mandatory component of the +// Armv8.6-A extensions, but is permitted as an optional feature for any +// implementation of Armv8.2-A to Armv8.5-A (inclusive) +// RUN: %clang -target aarch64 -march=armv8.5a -### -c %s 2>&1 | FileCheck -check-prefix=NO-I8MM %s +// RUN: %clang -target aarch64 -march=armv8.5a+i8mm -### -c %s 2>&1 | FileCheck -check-prefix=I8MM %s +// NO-I8MM-NOT: "-target-feature" "+i8mm" +// I8MM: "-target-feature" "+i8mm" + +// The 32-bit floating point matrix multiply extension is enabled by default +// for armv8.6-a targets (or later) with SVE, and can optionally be enabled for +// any target from armv8.2a onwards (we don't enforce not using it with earlier +// targets). +// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=NO-F32MM %s +// RUN: %clang -target aarch64 -march=armv8.6a+sve -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s +// RUN: %clang -target aarch64 -march=armv8.5a+f32mm -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s +// NO-F32MM-NOT: "-target-feature" "+f32mm" +// F32MM: "-target-feature" "+f32mm" + +// The 64-bit floating point matrix multiply extension is not currently enabled +// by default for any targets, because it requires an SVE vector length >= 256 +// bits. When we add a CPU which has that, then it can be enabled by default, +// but for now it can only be used by adding the +f64mm feature. +// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=NO-F64MM %s +// RUN: %clang -target aarch64 -march=armv8.6a+sve -### -c %s 2>&1 | FileCheck -check-prefix=NO-F64MM %s +// RUN: %clang -target aarch64 -march=armv8.6a+f64mm -### -c %s 2>&1 | FileCheck -check-prefix=F64MM %s +// NO-F64MM-NOT: "-target-feature" "+f64mm" +// F64MM: "-target-feature" "+f64mm" + // fullfp16 is off by default for v8a, feature must not be mentioned // RUN: %clang -target aarch64 -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s // RUN: %clang -target aarch64 -march=armv8-a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s diff --git a/clang/test/Driver/arm-matrix-multiply.c b/clang/test/Driver/arm-matrix-multiply.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/arm-matrix-multiply.c @@ -0,0 +1,14 @@ +// RUN: %clang -### -target arm-none-none-eabi -march=armv8.5a+i8mm %s 2>&1 | FileCheck %s +// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.5a+i8mm %s 2>&1 | FileCheck %s +// CHECK: "-target-feature" "+i8mm" +// CHECK-NOT: "-target-feature" "-i8mm" + +// RUN: %clang -### -target arm-none-none-eabi -march=armv8.6a+noi8mm %s 2>&1 | FileCheck %s --check-prefix=NOI8MM +// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.6a+noi8mm %s 2>&1 | FileCheck %s --check-prefix=NOI8MM +// NOI8MM: "-target-feature" "-i8mm" +// NOI8MM-NOT: "-target-feature" "+i8mm" + +// RUN: %clang -### -target arm-none-none-eabi %s 2>&1 | FileCheck %s --check-prefix=ABSENT +// RUN: %clang -### -target aarch64-none-none-eabi %s 2>&1 | FileCheck %s --check-prefix=ABSENT +// ABSENT-NOT: "-target-feature" "+i8mm" +// ABSENT-NOT: "-target-feature" "-i8mm" diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h --- a/llvm/include/llvm/Support/AArch64TargetParser.h +++ b/llvm/include/llvm/Support/AArch64TargetParser.h @@ -24,7 +24,7 @@ namespace AArch64 { // Arch extension modifiers for CPUs. -enum ArchExtKind : unsigned { +enum ArchExtKind : uint64_t { AEK_INVALID = 0, AEK_NONE = 1, AEK_CRC = 1 << 1, @@ -57,6 +57,8 @@ AEK_TME = 1 << 28, AEK_BF16 = 1 << 29, AEK_I8MM = 1 << 30, + AEK_F32MM = 1ULL << 31, + AEK_F64MM = 1ULL << 32, }; enum class ArchKind { diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -88,6 +88,8 @@ AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") AARCH64_ARCH_EXT_NAME("bf16", AArch64::AEK_BF16, "+bf16", "-bf16") AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm") +AARCH64_ARCH_EXT_NAME("f32mm", AArch64::AEK_F32MM, "+f32mm", "-f32mm") +AARCH64_ARCH_EXT_NAME("f64mm", AArch64::AEK_F64MM, "+f64mm", "-f64mm") AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme") #undef AARCH64_ARCH_EXT_NAME diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h --- a/llvm/include/llvm/Support/ARMTargetParser.h +++ b/llvm/include/llvm/Support/ARMTargetParser.h @@ -47,14 +47,15 @@ AEK_FP_DP = 1 << 18, AEK_LOB = 1 << 19, AEK_BF16 = 1 << 20, - AEK_CDECP0 = 1 << 21, - AEK_CDECP1 = 1 << 22, - AEK_CDECP2 = 1 << 23, - AEK_CDECP3 = 1 << 24, - AEK_CDECP4 = 1 << 25, - AEK_CDECP5 = 1 << 26, - AEK_CDECP6 = 1 << 27, - AEK_CDECP7 = 1 << 28, + AEK_I8MM = 1 << 21, + AEK_CDECP0 = 1 << 22, + AEK_CDECP1 = 1 << 23, + AEK_CDECP2 = 1 << 24, + AEK_CDECP3 = 1 << 25, + AEK_CDECP4 = 1 << 26, + AEK_CDECP5 = 1 << 27, + AEK_CDECP6 = 1 << 28, + AEK_CDECP7 = 1 << 29, // Unsupported extensions. AEK_OS = 1ULL << 59, diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -116,7 +116,8 @@ ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | - ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES)) + ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES | + ARM::AEK_I8MM)) ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R, FK_NEON_FP_ARMV8, (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | @@ -171,6 +172,7 @@ ARM_ARCH_EXT_NAME("fp16fml", ARM::AEK_FP16FML, "+fp16fml", "-fp16fml") ARM_ARCH_EXT_NAME("bf16", ARM::AEK_BF16, "+bf16", "-bf16") ARM_ARCH_EXT_NAME("sb", ARM::AEK_SB, "+sb", "-sb") +ARM_ARCH_EXT_NAME("i8mm", ARM::AEK_I8MM, "+i8mm", "-i8mm") ARM_ARCH_EXT_NAME("lob", ARM::AEK_LOB, "+lob", "-lob") ARM_ARCH_EXT_NAME("cdecp0", ARM::AEK_CDECP0, "+cdecp0", "-cdecp0") ARM_ARCH_EXT_NAME("cdecp1", ARM::AEK_CDECP1, "+cdecp1", "-cdecp1") diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -636,6 +636,7 @@ {"maverick", "maverick", nullptr, nullptr}, {"xscale", "noxscale", nullptr, nullptr}, {"sb", "nosb", "+sb", "-sb"}, + {"i8mm", "noi8mm", "+i8mm", "-i8mm"}, {"mve", "nomve", "+mve", "-mve"}, {"mve.fp", "nomve.fp", "+mve.fp", "-mve.fp"}}; @@ -1230,7 +1231,10 @@ {"tme", "notme", "+tme", "-tme"}, {"ssbs", "nossbs", "+ssbs", "-ssbs"}, {"sb", "nosb", "+sb", "-sb"}, - {"predres", "nopredres", "+predres", "-predres"} + {"predres", "nopredres", "+predres", "-predres"}, + {"i8mm", "noi8mm", "+i8mm", "-i8mm"}, + {"f32mm", "nof32mm", "+f32mm", "-f32mm"}, + {"f64mm", "nof64mm", "+f64mm", "-f64mm"}, }; for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {