Index: include/llvm/Support/AArch64TargetParser.h =================================================================== --- include/llvm/Support/AArch64TargetParser.h +++ include/llvm/Support/AArch64TargetParser.h @@ -49,6 +49,11 @@ AEK_SSBS = 1 << 20, AEK_SB = 1 << 21, AEK_PREDRES = 1 << 22, + AEK_SVE2 = 1 << 23, + AEK_SVE2AES = 1 << 24, + AEK_SVE2SM4 = 1 << 25, + AEK_SVE2SHA3 = 1 << 26, + AEK_BITPERM = 1 << 27, }; enum class ArchKind { Index: include/llvm/Support/AArch64TargetParser.def =================================================================== --- include/llvm/Support/AArch64TargetParser.def +++ include/llvm/Support/AArch64TargetParser.def @@ -50,30 +50,35 @@ #define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) #endif // FIXME: This would be nicer were it tablegen -AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr) -AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr) -AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc") -AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse") -AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm") -AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto") -AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4") -AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3") -AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2") -AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes") -AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod") -AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") -AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") -AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16") -AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml") -AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") -AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") -AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve") -AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc") -AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand") -AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") -AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs") -AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb") -AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") +AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr) +AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr) +AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc") +AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse") +AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm") +AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto") +AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4") +AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3") +AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2") +AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes") +AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod") +AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") +AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") +AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16") +AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml") +AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") +AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") +AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve") +AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2") +AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes") +AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4") +AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3") +AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm") +AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc") +AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand") +AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") +AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs") +AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb") +AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") #undef AARCH64_ARCH_EXT_NAME #ifndef AARCH64_CPU_NAME Index: include/llvm/Support/ARMTargetParser.h =================================================================== --- include/llvm/Support/ARMTargetParser.h +++ include/llvm/Support/ARMTargetParser.h @@ -45,6 +45,11 @@ AEK_AES = 1 << 16, AEK_FP16FML = 1 << 17, AEK_SB = 1 << 18, + AEK_SVE2 = 1 << 19, + AEK_SVE2AES = 1 << 20, + AEK_SVE2SM4 = 1 << 21, + AEK_SVE2SHA3 = 1 << 22, + AEK_BITPERM = 1 << 23, // Unsupported extensions. AEK_OS = 0x8000000, AEK_IWMMXT = 0x10000000, Index: lib/Support/AArch64TargetParser.cpp =================================================================== --- lib/Support/AArch64TargetParser.cpp +++ lib/Support/AArch64TargetParser.cpp @@ -88,6 +88,16 @@ Features.push_back("+rdm"); if (Extensions & AEK_SVE) Features.push_back("+sve"); + if (Extensions & AEK_SVE2) + Features.push_back("+sve2"); + if (Extensions & AEK_SVE2AES) + Features.push_back("+sve2-aes"); + if (Extensions & AEK_SVE2SM4) + Features.push_back("+sve2-sm4"); + if (Extensions & AEK_SVE2SHA3) + Features.push_back("+sve2-sha3"); + if (Extensions & AEK_BITPERM) + Features.push_back("+bitperm"); if (Extensions & AEK_RCPC) Features.push_back("+rcpc"); Index: lib/Target/AArch64/AArch64.td =================================================================== --- lib/Target/AArch64/AArch64.td +++ lib/Target/AArch64/AArch64.td @@ -103,6 +103,21 @@ def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", "Enable Scalable Vector Extension (SVE) instructions">; +def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true", + "Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>; + +def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true", + "Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>; + +def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true", + "Enable SM4 SVE2 instructions", [FeatureSVE2, FeatureSM4]>; + +def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true", + "Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>; + +def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true", + "Enable bit permutation SVE2 instructions", [FeatureSVE2]>; + def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", @@ -395,6 +410,18 @@ //===----------------------------------------------------------------------===// // AArch64 Processors supported. // + +//===----------------------------------------------------------------------===// +// Unsupported features to disable for scheduling models +//===----------------------------------------------------------------------===// + +class AArch64Unsupported { list F; } + +def SVEUnsupported : AArch64Unsupported { + let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, + HasSVE2BitPerm]; +} + include "AArch64SchedA53.td" include "AArch64SchedA57.td" include "AArch64SchedCyclone.td" Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -107,6 +107,16 @@ "fuse-aes">; def HasSVE : Predicate<"Subtarget->hasSVE()">, AssemblerPredicate<"FeatureSVE", "sve">; +def HasSVE2 : Predicate<"Subtarget->hasSVE2()">, + AssemblerPredicate<"FeatureSVE2", "sve2">; +def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">, + AssemblerPredicate<"FeatureSVE2AES", "sve2-aes">; +def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, + AssemblerPredicate<"FeatureSVE2SM4", "sve2-sm4">; +def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, + AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">; +def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2()">, + AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicate<"FeatureRCPC", "rcpc">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, Index: lib/Target/AArch64/AArch64SchedA53.td =================================================================== --- lib/Target/AArch64/AArch64SchedA53.td +++ lib/Target/AArch64/AArch64SchedA53.td @@ -26,7 +26,7 @@ // v 1.0 Spreadsheet let CompleteModel = 1; - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = SVEUnsupported.F; } Index: lib/Target/AArch64/AArch64SchedA57.td =================================================================== --- lib/Target/AArch64/AArch64SchedA57.td +++ lib/Target/AArch64/AArch64SchedA57.td @@ -31,7 +31,7 @@ let LoopMicroOpBufferSize = 16; let CompleteModel = 1; - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = SVEUnsupported.F; } //===----------------------------------------------------------------------===// Index: lib/Target/AArch64/AArch64SchedCyclone.td =================================================================== --- lib/Target/AArch64/AArch64SchedCyclone.td +++ lib/Target/AArch64/AArch64SchedCyclone.td @@ -18,7 +18,7 @@ let MispredictPenalty = 16; // 14-19 cycles are typical. let CompleteModel = 1; - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = SVEUnsupported.F; } //===----------------------------------------------------------------------===// Index: lib/Target/AArch64/AArch64SchedExynosM1.td =================================================================== --- lib/Target/AArch64/AArch64SchedExynosM1.td +++ lib/Target/AArch64/AArch64SchedExynosM1.td @@ -24,7 +24,7 @@ let MispredictPenalty = 14; // Minimum branch misprediction penalty. let CompleteModel = 1; // Use the default model otherwise. - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = SVEUnsupported.F; } //===----------------------------------------------------------------------===// Index: lib/Target/AArch64/AArch64SchedExynosM3.td =================================================================== --- lib/Target/AArch64/AArch64SchedExynosM3.td +++ lib/Target/AArch64/AArch64SchedExynosM3.td @@ -24,7 +24,7 @@ let MispredictPenalty = 16; // Minimum branch misprediction penalty. let CompleteModel = 1; // Use the default model otherwise. - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = SVEUnsupported.F; } //===----------------------------------------------------------------------===// Index: lib/Target/AArch64/AArch64SchedExynosM4.td =================================================================== --- lib/Target/AArch64/AArch64SchedExynosM4.td +++ lib/Target/AArch64/AArch64SchedExynosM4.td @@ -24,7 +24,7 @@ let MispredictPenalty = 16; // Minimum branch misprediction penalty. let CompleteModel = 1; // Use the default model otherwise. - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = SVEUnsupported.F; } //===----------------------------------------------------------------------===// Index: lib/Target/AArch64/AArch64SchedFalkor.td =================================================================== --- lib/Target/AArch64/AArch64SchedFalkor.td +++ lib/Target/AArch64/AArch64SchedFalkor.td @@ -23,7 +23,7 @@ let MispredictPenalty = 11; // Minimum branch misprediction penalty. let CompleteModel = 1; - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = SVEUnsupported.F; // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; Index: lib/Target/AArch64/AArch64SchedKryo.td =================================================================== --- lib/Target/AArch64/AArch64SchedKryo.td +++ lib/Target/AArch64/AArch64SchedKryo.td @@ -27,7 +27,7 @@ let LoopMicroOpBufferSize = 16; let CompleteModel = 1; - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = SVEUnsupported.F; // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; Index: lib/Target/AArch64/AArch64SchedThunderX.td =================================================================== --- lib/Target/AArch64/AArch64SchedThunderX.td +++ lib/Target/AArch64/AArch64SchedThunderX.td @@ -25,7 +25,7 @@ let PostRAScheduler = 1; // Use PostRA scheduler. let CompleteModel = 1; - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = SVEUnsupported.F; // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; Index: lib/Target/AArch64/AArch64SchedThunderX2T99.td =================================================================== --- lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -25,7 +25,7 @@ let PostRAScheduler = 1; // Using PostRA sched. let CompleteModel = 1; - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = SVEUnsupported.F; // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -119,6 +119,7 @@ bool HasLSLFast = false; bool HasSVE = false; + bool HasSVE2 = false; bool HasRCPC = false; bool HasAggressiveFMA = false; @@ -134,6 +135,12 @@ bool HasRandGen = false; bool HasMTE = false; + // Arm SVE2 extensions + bool HasSVE2AES = true; + bool HasSVE2SM4 = true; + bool HasSVE2SHA3 = true; + bool HasSVE2BitPerm = true; + // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove = false; @@ -360,6 +367,7 @@ bool hasSPE() const { return HasSPE; } bool hasLSLFast() const { return HasLSLFast; } bool hasSVE() const { return HasSVE; } + bool hasSVE2() const { return HasSVE2; } bool hasRCPC() const { return HasRCPC; } bool hasAggressiveFMA() const { return HasAggressiveFMA; } bool hasAlternativeNZCV() const { return HasAlternativeNZCV; } @@ -372,6 +380,11 @@ bool hasBTI() const { return HasBTI; } bool hasRandGen() const { return HasRandGen; } bool hasMTE() const { return HasMTE; } + // Arm SVE2 extensions + bool hasSVE2AES() const { return HasSVE2AES; } + bool hasSVE2SM4() const { return HasSVE2SM4; } + bool hasSVE2SHA3() const { return HasSVE2SHA3; } + bool hasSVE2BitPerm() const { return HasSVE2BitPerm; } bool isLittleEndian() const { return IsLittle; } Index: lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp =================================================================== --- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2831,6 +2831,11 @@ {"pan-rwv", {AArch64::FeaturePAN_RWV}}, {"ccpp", {AArch64::FeatureCCPP}}, {"sve", {AArch64::FeatureSVE}}, + {"sve2", {AArch64::FeatureSVE2}}, + {"sve2-aes", {AArch64::FeatureSVE2AES}}, + {"sve2-sm4", {AArch64::FeatureSVE2SM4}}, + {"sve2-sha3", {AArch64::FeatureSVE2SHA3}}, + {"bitperm", {AArch64::FeatureSVE2BitPerm}}, // FIXME: Unsupported extensions {"pan", {}}, {"lor", {}}, Index: unittests/Support/TargetParserTest.cpp =================================================================== --- unittests/Support/TargetParserTest.cpp +++ unittests/Support/TargetParserTest.cpp @@ -1014,8 +1014,8 @@ AArch64::AEK_FP16 | AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM | AArch64::AEK_SVE | - AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | - AArch64::AEK_FP16FML; + AArch64::AEK_SVE2 | AArch64::AEK_DOTPROD | + AArch64::AEK_RCPC | AArch64::AEK_FP16FML; for (unsigned i = 0; i <= Extensions; i++) EXPECT_TRUE(i == 0 ? !AArch64::getExtensionFeatures(i, Features) @@ -1043,6 +1043,14 @@ {"lse", "nolse", "+lse", "-lse"}, {"rdm", "nordm", "+rdm", "-rdm"}, {"sve", "nosve", "+sve", "-sve"}, + {"sve2", "nosve2", "+sve2", "-sve2"}, + {"sve2-aes", "nosve2-aes", "+sve2-aes", + "-sve2-aes"}, + {"sve2-sm4", "nosve2-sm4", "+sve2-sm4", + "-sve2-sm4"}, + {"sve2-sha3", "nosve2-sha3", "+sve2-sha3", + "-sve2-sha3"}, + {"bitperm", "nobitperm", "+bitperm", "-bitperm"}, {"dotprod", "nodotprod", "+dotprod", "-dotprod"}, {"rcpc", "norcpc", "+rcpc", "-rcpc" }, {"rng", "norng", "+rand", "-rand"},