diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -154,6 +154,10 @@ def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", "Has zero-cycle zeroing instructions for generic registers">; +// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0". +// as movi is more efficient across all cores. Newer cores can eliminate +// fmovs early and there is no difference with movi, but this not true for +// all implementations. def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false", "Has no zero-cycle zeroing instructions for FP registers">; @@ -168,7 +172,7 @@ "The zero-cycle floating-point zeroing instruction has a bug">; def FeatureStrictAlign : SubtargetFeature<"strict-align", - "StrictAlign", "true", + "RequiresStrictAlign", "true", "Disallow all unaligned memory " "access">; @@ -190,11 +194,11 @@ "Prefer likely predicted branches over selects">; def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", - "CustomAsCheapAsMove", "true", + "HasCustomCheapAsMoveHandling", "true", "Use custom handling of cheap instructions">; def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", - "ExynosAsCheapAsMove", "true", + "HasExynosCheapAsMoveHandling", "true", "Use Exynos specific handling of cheap instructions", [FeatureCustomCheapAsMoveHandling]>; @@ -202,12 +206,12 @@ "UsePostRAScheduler", "true", "Schedule again after register allocation">; def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", - "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">; + "IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">; def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", - "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">; + "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">; -def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow", +def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow", "true", "STR of Q register with register offset is slow">; def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< @@ -818,8 +822,8 @@ FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive]>; -def TuneExynosM4 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", - "Samsung Exynos-M3 processors", +def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", + "Samsung Exynos-M4 processors", [FeatureArithmeticBccFusion, FeatureArithmeticCbzFusion, FeatureExynosCheapAsMoveHandling, diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -87,191 +87,14 @@ /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. ARMProcFamilyEnum ARMProcFamily = Others; - bool HasV8_0aOps = false; - bool HasV8_1aOps = false; - bool HasV8_2aOps = false; - bool HasV8_3aOps = false; - bool HasV8_4aOps = false; - bool HasV8_5aOps = false; - bool HasV8_6aOps = false; - bool HasV8_7aOps = false; - bool HasV8_8aOps = false; - bool HasV9_0aOps = false; - bool HasV9_1aOps = false; - bool HasV9_2aOps = false; - bool HasV9_3aOps = false; - bool HasV8_0rOps = false; - - bool HasCONTEXTIDREL2 = false; - bool HasEL2VMSA = false; - bool HasEL3 = false; - bool HasFPARMv8 = false; - bool HasNEON = false; - bool HasCrypto = false; - bool HasDotProd = false; - bool HasCRC = false; - bool HasLSE = false; - bool HasLSE2 = false; - bool HasRAS = false; - bool HasRDM = false; - bool HasPerfMon = false; - bool HasFullFP16 = false; - bool HasFP16FML = false; - bool HasSPE = false; - - bool FixCortexA53_835769 = false; - - // ARMv8.1 extensions - bool HasVH = false; - bool HasPAN = false; - bool HasLOR = false; - - // ARMv8.2 extensions - bool HasPsUAO = false; - bool HasPAN_RWV = false; - bool HasCCPP = false; - - // SVE extensions - bool HasSVE = false; - bool UseExperimentalZeroingPseudos = false; - bool UseScalarIncVL = false; - - // Armv8.2 Crypto extensions - bool HasSM4 = false; - bool HasSHA3 = false; - bool HasSHA2 = false; - bool HasAES = false; - - // ARMv8.3 extensions - bool HasPAuth = false; - bool HasJS = false; - bool HasCCIDX = false; - bool HasComplxNum = false; - - // ARMv8.4 extensions - bool HasNV = false; - bool HasMPAM = false; - bool HasDIT = false; - bool HasTRACEV8_4 = false; - bool HasAM = false; - bool HasSEL2 = false; - bool HasTLB_RMI = false; - bool HasFlagM = false; - bool HasRCPC_IMMO = false; - - bool HasLSLFast = false; - bool HasRCPC = false; - bool HasAggressiveFMA = false; - - // Armv8.5-A Extensions - bool HasAlternativeNZCV = false; - bool HasFRInt3264 = false; - bool HasSpecRestrict = false; - bool HasSSBS = false; - bool HasSB = false; - bool HasPredRes = false; - bool HasCCDP = false; - bool HasBTI = false; - bool HasRandGen = false; - bool HasMTE = false; - bool HasTME = false; - - // Armv8.6-A Extensions - bool HasBF16 = false; - bool HasMatMulInt8 = false; - bool HasMatMulFP32 = false; - bool HasMatMulFP64 = false; - bool HasAMVS = false; - bool HasFineGrainedTraps = false; - bool HasEnhancedCounterVirtualization = false; - - // Armv8.7-A Extensions - bool HasXS = false; - bool HasWFxT = false; - bool HasHCX = false; - bool HasLS64 = false; - - // Armv8.8-A Extensions - bool HasHBC = false; - bool HasMOPS = false; - - // Arm SVE2 extensions - bool HasSVE2 = false; - bool HasSVE2AES = false; - bool HasSVE2SM4 = false; - bool HasSVE2SHA3 = false; - bool HasSVE2BitPerm = false; - - // Armv9-A Extensions - bool HasRME = false; - - // Arm Scalable Matrix Extension (SME) - bool HasSME = false; - bool HasSMEF64 = false; - bool HasSMEI64 = false; - bool HasStreamingSVE = false; - - // AppleA7 system register. - bool HasAppleA7SysReg = false; - - // Future architecture extensions. - bool HasETE = false; - bool HasTRBE = false; - bool HasBRBE = false; - bool HasSPE_EEF = false; - - // HasZeroCycleRegMove - Has zero-cycle register mov instructions. - bool HasZeroCycleRegMove = false; - - // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. - bool HasZeroCycleZeroing = false; - bool HasZeroCycleZeroingGP = false; - bool HasZeroCycleZeroingFPWorkaround = false; - - // It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0". - // as movi is more efficient across all cores. Newer cores can eliminate - // fmovs early and there is no difference with movi, but this not true for - // all implementations. - bool HasZeroCycleZeroingFP = true; - - // StrictAlign - Disallow unaligned memory accesses. - bool StrictAlign = false; - - // NegativeImmediates - transform instructions with negative immediates - bool NegativeImmediates = true; - // Enable 64-bit vectorization in SLP. unsigned MinVectorRegisterBitWidth = 64; - bool OutlineAtomics = false; - bool PredictableSelectIsExpensive = false; - bool BalanceFPOps = false; - bool CustomAsCheapAsMove = false; - bool ExynosAsCheapAsMove = false; - bool UsePostRAScheduler = false; - bool Misaligned128StoreIsSlow = false; - bool Paired128IsSlow = false; - bool STRQroIsSlow = false; - bool UseAlternateSExtLoadCVTF32Pattern = false; - bool HasArithmeticBccFusion = false; - bool HasArithmeticCbzFusion = false; - bool HasCmpBccFusion = false; - bool HasFuseAddress = false; - bool HasFuseAES = false; - bool HasFuseArithmeticLogic = false; - bool HasFuseCCSelect = false; - bool HasFuseCryptoEOR = false; - bool HasFuseLiterals = false; - bool DisableLatencySchedHeuristic = false; - bool UseRSqrt = false; - bool Force32BitJumpTables = false; - bool UseEL1ForTP = false; - bool UseEL2ForTP = false; - bool UseEL3ForTP = false; - bool AllowTaggedGlobals = false; - bool HardenSlsRetBr = false; - bool HardenSlsBlr = false; - bool HardenSlsNoComdat = false; +// Bool members corresponding to the SubtargetFeatures defined in tablegen +#define GET_SUBTARGETINFO_MACRO(NAME, ATTRIBUTE, DEFAULT, GETTER) \ + bool ATTRIBUTE = DEFAULT; +#include "AArch64GenSubtargetInfo.inc" + uint8_t MaxInterleaveFactor = 2; uint8_t VectorInsertExtractBaseCost = 3; uint16_t CacheLineSize = 0; @@ -331,6 +154,11 @@ unsigned MinSVEVectorSizeInBitsOverride = 0, unsigned MaxSVEVectorSizeInBitsOverride = 0); +// Getters for SubtargetFeatures defined in tablegen +#define GET_SUBTARGETINFO_MACRO(NAME, ATTRIBUTE, DEFAULT, GETTER) \ + bool GETTER() const { return ATTRIBUTE; } +#include "AArch64GenSubtargetInfo.inc" + const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } @@ -351,9 +179,7 @@ const RegisterBankInfo *getRegBankInfo() const override; const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } - bool enablePostRAScheduler() const override { - return UsePostRAScheduler; - } + bool enablePostRAScheduler() const override { return usePostRAScheduler(); } /// Returns ARM processor family. /// Avoid this function! CPU specifics should be kept local to this class @@ -363,30 +189,6 @@ return ARMProcFamily; } - bool hasV8_0aOps() const { return HasV8_0aOps; } - bool hasV8_1aOps() const { return HasV8_1aOps; } - bool hasV8_2aOps() const { return HasV8_2aOps; } - bool hasV8_3aOps() const { return HasV8_3aOps; } - bool hasV8_4aOps() const { return HasV8_4aOps; } - bool hasV8_5aOps() const { return HasV8_5aOps; } - bool hasV9_0aOps() const { return HasV9_0aOps; } - bool hasV9_1aOps() const { return HasV9_1aOps; } - bool hasV9_2aOps() const { return HasV9_2aOps; } - bool hasV9_3aOps() const { return HasV9_3aOps; } - bool hasV8_0rOps() const { return HasV8_0rOps; } - - bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } - - bool hasZeroCycleZeroingGP() const { return HasZeroCycleZeroingGP; } - - bool hasZeroCycleZeroingFP() const { return HasZeroCycleZeroingFP; } - - bool hasZeroCycleZeroingFPWorkaround() const { - return HasZeroCycleZeroingFPWorkaround; - } - - bool requiresStrictAlign() const { return StrictAlign; } - bool isXRaySupported() const override { return true; } unsigned getMinVectorRegisterBitWidth() const { @@ -399,41 +201,6 @@ return CustomCallSavedXRegs[i]; } bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); } - bool hasFPARMv8() const { return HasFPARMv8; } - bool hasNEON() const { return HasNEON; } - bool hasCrypto() const { return HasCrypto; } - bool hasDotProd() const { return HasDotProd; } - bool hasCRC() const { return HasCRC; } - bool hasLSE() const { return HasLSE; } - bool hasLSE2() const { return HasLSE2; } - bool hasRAS() const { return HasRAS; } - bool hasRDM() const { return HasRDM; } - bool hasSM4() const { return HasSM4; } - bool hasSHA3() const { return HasSHA3; } - bool hasSHA2() const { return HasSHA2; } - bool hasAES() const { return HasAES; } - bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; } - bool balanceFPOps() const { return BalanceFPOps; } - bool predictableSelectIsExpensive() const { - return PredictableSelectIsExpensive; - } - bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; } - bool hasExynosCheapAsMoveHandling() const { return ExynosAsCheapAsMove; } - bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; } - bool isPaired128Slow() const { return Paired128IsSlow; } - bool isSTRQroSlow() const { return STRQroIsSlow; } - bool useAlternateSExtLoadCVTF32Pattern() const { - return UseAlternateSExtLoadCVTF32Pattern; - } - bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } - bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } - bool hasCmpBccFusion() const { return HasCmpBccFusion; } - bool hasFuseAddress() const { return HasFuseAddress; } - bool hasFuseAES() const { return HasFuseAES; } - bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; } - bool hasFuseCCSelect() const { return HasFuseCCSelect; } - bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; } - bool hasFuseLiterals() const { return HasFuseLiterals; } /// Return true if the CPU supports any kind of instruction fusion. bool hasFusion() const { @@ -442,16 +209,6 @@ hasFuseCCSelect() || hasFuseLiterals(); } - bool hardenSlsRetBr() const { return HardenSlsRetBr; } - bool hardenSlsBlr() const { return HardenSlsBlr; } - bool hardenSlsNoComdat() const { return HardenSlsNoComdat; } - - bool useEL1ForTP() const { return UseEL1ForTP; } - bool useEL2ForTP() const { return UseEL2ForTP; } - bool useEL3ForTP() const { return UseEL3ForTP; } - - bool useRSqrt() const { return UseRSqrt; } - bool force32BitJumpTables() const { return Force32BitJumpTables; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getVectorInsertExtractBaseCost() const { return VectorInsertExtractBaseCost; @@ -480,58 +237,10 @@ unsigned getWideningBaseCost() const { return WideningBaseCost; } - bool useExperimentalZeroingPseudos() const { - return UseExperimentalZeroingPseudos; - } - - bool useScalarIncVL() const { return UseScalarIncVL; } - /// CPU has TBI (top byte of addresses is ignored during HW address /// translation) and OS enables it. bool supportsAddressTopByteIgnored() const; - bool hasPerfMon() const { return HasPerfMon; } - bool hasFullFP16() const { return HasFullFP16; } - bool hasFP16FML() const { return HasFP16FML; } - bool hasSPE() const { return HasSPE; } - bool hasLSLFast() const { return HasLSLFast; } - bool hasSVE() const { return HasSVE; } - bool hasSVE2() const { return HasSVE2; } - bool hasRCPC() const { return HasRCPC; } - bool hasAggressiveFMA() const { return HasAggressiveFMA; } - bool hasAlternativeNZCV() const { return HasAlternativeNZCV; } - bool hasFRInt3264() const { return HasFRInt3264; } - bool hasSpecRestrict() const { return HasSpecRestrict; } - bool hasSSBS() const { return HasSSBS; } - bool hasSB() const { return HasSB; } - bool hasPredRes() const { return HasPredRes; } - bool hasCCDP() const { return HasCCDP; } - bool hasBTI() const { return HasBTI; } - bool hasRandGen() const { return HasRandGen; } - bool hasMTE() const { return HasMTE; } - bool hasTME() const { return HasTME; } - // Arm SVE2 extensions - bool hasSVE2AES() const { return HasSVE2AES; } - bool hasSVE2SM4() const { return HasSVE2SM4; } - bool hasSVE2SHA3() const { return HasSVE2SHA3; } - bool hasSVE2BitPerm() const { return HasSVE2BitPerm; } - bool hasMatMulInt8() const { return HasMatMulInt8; } - bool hasMatMulFP32() const { return HasMatMulFP32; } - bool hasMatMulFP64() const { return HasMatMulFP64; } - - // Armv8.6-A Extensions - bool hasBF16() const { return HasBF16; } - bool hasFineGrainedTraps() const { return HasFineGrainedTraps; } - bool hasEnhancedCounterVirtualization() const { - return HasEnhancedCounterVirtualization; - } - - // Arm Scalable Matrix Extension (SME) - bool hasSME() const { return HasSME; } - bool hasSMEF64() const { return HasSMEF64; } - bool hasSMEI64() const { return HasSMEI64; } - bool hasStreamingSVE() const { return HasStreamingSVE; } - bool isLittleEndian() const { return IsLittle; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } @@ -552,42 +261,6 @@ bool useAA() const override; - bool outlineAtomics() const { return OutlineAtomics; } - - bool hasVH() const { return HasVH; } - bool hasPAN() const { return HasPAN; } - bool hasLOR() const { return HasLOR; } - - bool hasPsUAO() const { return HasPsUAO; } - bool hasPAN_RWV() const { return HasPAN_RWV; } - bool hasCCPP() const { return HasCCPP; } - - bool hasPAuth() const { return HasPAuth; } - bool hasJS() const { return HasJS; } - bool hasCCIDX() const { return HasCCIDX; } - bool hasComplxNum() const { return HasComplxNum; } - - bool hasNV() const { return HasNV; } - bool hasMPAM() const { return HasMPAM; } - bool hasDIT() const { return HasDIT; } - bool hasTRACEV8_4() const { return HasTRACEV8_4; } - bool hasAM() const { return HasAM; } - bool hasAMVS() const { return HasAMVS; } - bool hasXS() const { return HasXS; } - bool hasWFxT() const { return HasWFxT; } - bool hasHCX() const { return HasHCX; } - bool hasLS64() const { return HasLS64; } - bool hasSEL2() const { return HasSEL2; } - bool hasTLB_RMI() const { return HasTLB_RMI; } - bool hasFlagM() const { return HasFlagM; } - bool hasRCPC_IMMO() const { return HasRCPC_IMMO; } - bool hasEL2VMSA() const { return HasEL2VMSA; } - bool hasEL3() const { return HasEL3; } - bool hasHBC() const { return HasHBC; } - bool hasMOPS() const { return HasMOPS; } - - bool fixCortexA53_835769() const { return FixCortexA53_835769; } - bool addrSinkUsingGEPs() const override { // Keeping GEPs inbounds is important for exploiting AArch64 // addressing-modes in ILP32 mode. diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -19,9 +19,11 @@ // ARM Subtarget state. // -def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", +// True if compiling for Thumb, false for ARM +def ModeThumb : SubtargetFeature<"thumb-mode", "IsThumb", "true", "Thumb mode">; +// True if we're using software floating point features. def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", "true", "Use software floating " "point features.">; @@ -48,14 +50,18 @@ "Enable 64-bit FP registers", [FeatureFPRegs]>; +// If true, the floating point unit supports double precision. def FeatureFP64 : SubtargetFeature<"fp64", "HasFP64", "true", "Floating point unit supports " "double precision", [FeatureFPRegs64]>; +// True if subtarget has the full 32 double precision FP registers for VFPv3. def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true", "Extend FP to 32 double registers">; +/// Versions of the VFP flags restricted to single precision, or to +/// 16 d-registers, or both. multiclass VFPver prev, list otherimplies, @@ -100,6 +106,7 @@ "Enable NEON instructions", [FeatureVFP3]>; +// True if subtarget supports half-precision FP conversions def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision " "floating point">; @@ -110,169 +117,212 @@ defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP", [FeatureVFP4], []>; +// True if subtarget supports half-precision FP operations def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", "Enable full half-precision " "floating point", [FeatureFPARMv8_D16_SP, FeatureFPRegs16]>; +// True if subtarget supports half-precision FP fml operations def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", "Enable full half-precision " "floating point fml instructions", [FeatureFullFP16]>; +// True if subtarget supports [su]div in Thumb mode def FeatureHWDivThumb : SubtargetFeature<"hwdiv", - "HasHardwareDivideInThumb", "true", + "HasDivideInThumbMode", "true", "Enable divide instructions in Thumb">; +// True if subtarget supports [su]div in ARM mode def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", - "HasHardwareDivideInARM", "true", + "HasDivideInARMMode", "true", "Enable divide instructions in ARM mode">; // Atomic Support + +// True if the subtarget supports DMB / DSB data barrier instructions. def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", "Has data barrier (dmb/dsb) instructions">; +// True if the subtarget supports CLREX instructions def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", "Has v7 clrex instruction">; +// True if the subtarget supports DFB data barrier instruction. def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", "Has full data barrier (dfb) instruction">; +// True if the subtarget supports v8 atomics (LDA/LDAEX etc) instructions def FeatureAcquireRelease : SubtargetFeature<"acquire-release", "HasAcquireRelease", "true", "Has v8 acquire/release (lda/ldaex " " etc) instructions">; -def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", +// True if floating point compare + branch is slow. +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "IsFPBrccSlow", "true", "FP compare + branch is slow">; +// If true, the processor supports the Performance Monitor Extensions. These +// include a generic cycle-counter as well as more fine-grained (often +// implementation-specific) events. def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", "Enable support for Performance " "Monitor extensions">; // TrustZone Security Extensions + +// If true, processor supports TrustZone security extensions def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", "Enable support for TrustZone " "security extensions">; +// If true, processor supports ARMv8-M Security Extensions def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", "Enable support for ARMv8-M " "Security Extensions">; +// If true, processor supports SHA1 and SHA256 def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true", "Enable SHA1 and SHA256 support", [FeatureNEON]>; +// If true, processor supports AES def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES support", [FeatureNEON]>; +// If true, processor supports Cryptography extensions def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", "Enable support for " "Cryptography extensions", [FeatureNEON, FeatureSHA2, FeatureAES]>; +// If true, processor supports CRC instructions def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable support for CRC instructions">; +// HasDotProd - True if the ARMv8.2A dot product instructions are supported. def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", "Enable support for dot product instructions", [FeatureNEON]>; +// If true, the processor supports RAS extensions. // Not to be confused with FeatureHasRetAddrStack (return address stack) def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", "Enable Reliability, Availability " "and Serviceability extensions">; // Fast computation of non-negative address offsets +// If true, processor does positive address offset computation faster. def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", "Enable fast computation of " "positive address offsets">; // Fast execution of AES crypto operations +// If true, processor executes back to back AES instruction pairs faster. def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", "CPU fuses AES crypto operations">; // Fast execution of bottom and top halves of literal generation +// If true, processor executes back to back bottom and top halves of literal generation faster def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true", "CPU fuses literal generation operations">; -// The way of reading thread pointer -def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true", +// The way of reading thread pointer +// If true, read thread pointer from coprocessor register. +def FeatureReadTp : SubtargetFeature<"read-tp-hard", "IsReadTPHard", "true", "Reading thread pointer from register">; // Cyclone can zero VFP registers in 0 cycles. +// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are +// particularly effective at zeroing a VFP register. def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; // Whether it is profitable to unpredicate certain instructions during if-conversion +// If true, if conversion may decide to leave some instructions unpredicated. def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", "IsProfitableToUnpredicate", "true", "Is profitable to unpredicate">; // Some targets (e.g. Swift) have microcoded VGETLNi32. +// If true, VMOV will be favored over VGETLNi32. def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", "HasSlowVGETLNi32", "true", "Has slow VGETLNi32 - prefer VMOV">; // Some targets (e.g. Swift) have microcoded VDUP32. +// If true, VMOV will be favored over VDUP. def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true", "Has slow VDUP32 - prefer VMOV">; // Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON // for scalar FP, as this allows more effective execution domain optimization. +// If true, VMOVSR will be favored over VMOVDRR. def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", "true", "Prefer VMOVSR">; // Swift has ISHST barriers compatible with Atomic Release semantics but weaker // than ISH -def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", +// If true, ISHST barriers will be used for Release semantics. +def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHSTBarriers", "true", "Prefer ISHST barriers">; // Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. +// If true, the AGU and NEON/FPU units are multiplexed. def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true", "Has muxed AGU and NEON/FPU">; // Whether VLDM/VSTM starting with odd register number need more microops // than single VLDRS -def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", +// If true, a VLDM/VSTM starting with an odd register number is considered to +// take more microops than single VLDRS/VSTRS. +def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "HasSlowOddRegister", "true", "VLDM/VSTM starting " "with an odd register is slow">; // Some targets have a renaming dependency when loading into D subregisters. +// If true, loading into a D subregister will be penalized. def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", - "SlowLoadDSubregister", "true", + "HasSlowLoadDSubregister", "true", "Loading into D subregs is slow">; +// If true, use a wider stride when allocating VFP registers. def FeatureUseWideStrideVFP : SubtargetFeature<"wide-stride-vfp", "UseWideStrideVFP", "true", "Use a wide stride when allocating VFP registers">; // Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. +// If true, VMOVS will never be widened to VMOVD. def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", "DontWidenVMOVS", "true", "Don't widen VMOVS to VMOVD">; // Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different // VFP register widths. +// If true, splat a register between VFP and NEON instructions. def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon", - "SplatVFPToNeon", "true", + "UseSplatVFPToNeon", "true", "Splat register from VFP to NEON", [FeatureDontWidenVMOVS]>; // Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. +// If true, run the MLx expansion pass. def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true", "Expand VFP/NEON MLA/MLS instructions">; // Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. +// If true, VFP/NEON VMLA/VMLS have special RAW hazards. def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", "true", "Has VMLx hazards">; // Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from // VFP to NEON, as an execution domain optimization. +// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs", "true", "Convert VMOVSR, VMOVRS, " @@ -281,18 +331,21 @@ // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. This affects instruction selection and should // only be enabled if the handling of denormals is not important. +// Use the method useNEONForSinglePrecisionFP() to determine if NEON should actually be used. def FeatureNEONForFP : SubtargetFeature<"neonfp", - "UseNEONForSinglePrecisionFP", + "HasNEONForFP", "true", "Use NEON for single precision FP">; // On some processors, VLDn instructions that access unaligned data take one // extra cycle. Take that into account when computing operand latencies. -def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", +// If true, VLDn instructions take an extra cycle for unaligned accesses. +def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAccessAlignment", "true", "Check for VLDn unaligned access">; // Some processors have a nonpipelined VFP coprocessor. +// If true, VFP instructions are not pipelined. def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", "NonpipelinedVFP", "true", "VFP instructions are not pipelined">; @@ -300,20 +353,27 @@ // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. +// If the VFP2 / NEON instructions are available, indicates +// whether the FP VML[AS] instructions are slow (if so, don't use them). def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", "Disable VFP / NEON MAC instructions">; -// VFPv4 added VFMA instructions that can similar be fast or slow. +// VFPv4 added VFMA instructions that can similarly be fast or slow. +// If the VFP4 / NEON instructions are available, indicates +// whether the FP VFM[AS] instructions are slow (if so, don't use them). def FeatureHasSlowFPVFMx : SubtargetFeature<"slowfpvfmx", "SlowFPVFMx", "true", "Disable VFP / NEON FMA instructions">; // Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. +/// If true, NEON has special multiplier accumulator +/// forwarding to allow mul + mla being issued back to back. def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", "HasVMLxForwarding", "true", "Has multiplier accumulator forwarding">; // Disable 32-bit to 16-bit narrowing for experimentation. -def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", +// If true, codegen would prefer 32-bit Thumb instructions over 16-bit ones. +def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Prefers32BitThumb", "true", "Prefer 32-bit Thumb instrs">; def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2", @@ -332,17 +392,24 @@ /// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is /// mapped to a separate physical register. Avoid partial CPSR update for these /// processors. +/// If true, codegen would avoid using instructions +/// that partially update CPSR and add false dependency on the previous +/// CPSR setting instruction. def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; /// Disable +1 predication cost for instructions updating CPSR. /// Enabled for Cortex-A57. +/// If true, disable +1 predication cost +/// for instructions updating CPSR. Enabled for Cortex-A57. def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", "CheapPredicableCPSRDef", "true", "Disable +1 predication cost for instructions updating CPSR">; +// If true, codegen should avoid using flag setting +// movs with shifter operand (i.e. asr, lsl, lsr). def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", "AvoidMOVsShifterOperand", "true", "Avoid movs instructions with " @@ -357,16 +424,20 @@ // Some processors have no branch predictor, which changes the expected cost of // taking a branch which affects the choice of whether to use predicated // instructions. +// True if the subtarget has a branch predictor. Having +// a branch predictor or not changes the expected cost of taking a branch +// which affects the choice of whether to use predicated instructions. def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", "HasBranchPredictor", "false", "Has no branch predictor">; /// DSP extension. +/// If true, the subtarget supports the DSP (saturating arith and such) instructions. def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Supports DSP instructions in " "ARM and/or Thumb2">; -// Multiprocessing extension. +// True if the subtarget supports Multiprocessing extension (ARMv7 only). def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; @@ -378,31 +449,42 @@ // Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. // See ARMInstrInfo.td for details. +// If true, NaCl TRAP instruction is generated instead of the regular TRAP. def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", "NaCl trap">; +// If true, the subtarget disallows unaligned memory +// accesses for some types. For details, see +// ARMTargetLowering::allowsMisalignedMemoryAccesses(). def FeatureStrictAlign : SubtargetFeature<"strict-align", "StrictAlign", "true", "Disallow all unaligned memory " "access">; +// Generate calls via indirect call instructions. def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", "Generate calls via indirect call " "instructions">; +// Generate code that does not contain data access to code sections. def FeatureExecuteOnly : SubtargetFeature<"execute-only", "GenExecuteOnly", "true", "Enable the generation of " "execute only code.">; +// True if R9 is not available as a general purpose register. def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", "Reserve R9, making it unavailable" " as GPR">; +// True if MOVT / MOVW pairs are not used for materialization of +// 32-bit imms (including global addresses). def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", "Don't use movt/movw pairs for " "32-bit imms">; +/// Implicitly convert an instruction to a different one if its immediates +/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", "NegativeImmediates", "false", @@ -415,28 +497,35 @@ def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", "Use the MachineScheduler">; +// False if scheduling should happen again after register allocation. def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler", "DisablePostRAScheduler", "true", "Don't schedule again after register allocation">; // Armv8.5-A extensions +// Has speculation barrier def FeatureSB : SubtargetFeature<"sb", "HasSB", "true", "Enable v8.5a Speculation Barrier" >; // Armv8.6-A extensions + +// True if subtarget supports BFloat16 floating point operations def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true", "Enable support for BFloat16 instructions", [FeatureNEON]>; +// True if subtarget supports 8-bit integer matrix multiply def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", "true", "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>; // Armv8.1-M extensions +// If true, the processor supports the Low Overhead Branch extension def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true", "Enable Low Overhead Branch " "extensions">; +// Mitigate against the cve-2021-35465 security vulnurability. def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465", "FixCMSE_CVE_2021_35465", "true", "Mitigate against the cve-2021-35465 " @@ -446,6 +535,7 @@ "Enable Pointer Authentication and Branch " "Target Identification">; +/// Don't place a BTI instruction after return-twice constructs (setjmp) def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", "NoBTIAtReturnTwice", "true", "Don't place a BTI instruction " @@ -467,16 +557,18 @@ def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", "Is microcontroller profile ('M' series)">; - +// True if Thumb2 instructions are supported. def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; +// True if subtarget does not support ARM mode execution. def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", "Does not support ARM mode execution">; //===----------------------------------------------------------------------===// // ARM ISAa. // +// Specify whether target support specific ARM ISA variants. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; @@ -599,13 +691,16 @@ // Control codegen mitigation against Straight Line Speculation vulnerability. //===----------------------------------------------------------------------===// +/// Harden against Straight Line Speculation for Returns and Indirect Branches. def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", "HardenSlsRetBr", "true", "Harden against straight line speculation across RETurn and BranchRegister " "instructions">; +/// Harden against Straight Line Speculation for indirect calls. def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", "HardenSlsBlr", "true", "Harden against straight line speculation across indirect calls">; +/// Generate thunk code for SLS mitigation in the normal text section. def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat", "HardenSlsNoComdat", "true", "Generate thunk code for SLS mitigation in the normal text section">; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2337,7 +2337,7 @@ // Lower 'returns_twice' calls to a pseudo-instruction. if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && - !Subtarget->getNoBTIAtReturnTwice()) + !Subtarget->noBTIAtReturnTwice()) GuardWithBTI = AFI->branchTargetEnforcement(); // Determine whether this is a non-secure function call. diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -150,6 +150,11 @@ }; protected: +// Bool members corresponding to the SubtargetFeatures defined in tablegen +#define GET_SUBTARGETINFO_MACRO(NAME, ATTRIBUTE, DEFAULT, GETTER) \ + bool ATTRIBUTE = DEFAULT; +#include "ARMGenSubtargetInfo.inc" + /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. ARMProcFamilyEnum ARMProcFamily = Others; @@ -159,343 +164,22 @@ /// ARMArch - ARM architecture ARMArchEnum ARMArch = ARMv4t; - /// HasV4TOps, HasV5TOps, HasV5TEOps, - /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops - - /// Specify whether target support specific ARM ISA variants. - bool HasV4TOps = false; - bool HasV5TOps = false; - bool HasV5TEOps = false; - bool HasV6Ops = false; - bool HasV6MOps = false; - bool HasV6KOps = false; - bool HasV6T2Ops = false; - bool HasV7Ops = false; - bool HasV8Ops = false; - bool HasV8_1aOps = false; - bool HasV8_2aOps = false; - bool HasV8_3aOps = false; - bool HasV8_4aOps = false; - bool HasV8_5aOps = false; - bool HasV8_6aOps = false; - bool HasV8_8aOps = false; - bool HasV8_7aOps = false; - bool HasV9_0aOps = false; - bool HasV9_1aOps = false; - bool HasV9_2aOps = false; - bool HasV9_3aOps = false; - bool HasV8MBaselineOps = false; - bool HasV8MMainlineOps = false; - bool HasV8_1MMainlineOps = false; - bool HasMVEIntegerOps = false; - bool HasMVEFloatOps = false; - bool HasCDEOps = false; - - /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what - /// floating point ISAs are supported. - bool HasVFPv2 = false; - bool HasVFPv3 = false; - bool HasVFPv4 = false; - bool HasFPARMv8 = false; - bool HasNEON = false; - bool HasFPRegs = false; - bool HasFPRegs16 = false; - bool HasFPRegs64 = false; - - /// Versions of the VFP flags restricted to single precision, or to - /// 16 d-registers, or both. - bool HasVFPv2SP = false; - bool HasVFPv3SP = false; - bool HasVFPv4SP = false; - bool HasFPARMv8SP = false; - bool HasVFPv3D16 = false; - bool HasVFPv4D16 = false; - bool HasFPARMv8D16 = false; - bool HasVFPv3D16SP = false; - bool HasVFPv4D16SP = false; - bool HasFPARMv8D16SP = false; - - /// HasDotProd - True if the ARMv8.2A dot product instructions are supported. - bool HasDotProd = false; - - /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been - /// specified. Use the method useNEONForSinglePrecisionFP() to - /// determine if NEON should actually be used. - bool UseNEONForSinglePrecisionFP = false; - /// UseMulOps - True if non-microcoded fused integer multiply-add and /// multiply-subtract instructions should be used. bool UseMulOps = false; - /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates - /// whether the FP VML[AS] instructions are slow (if so, don't use them). - bool SlowFPVMLx = false; - - /// SlowFPVFMx - If the VFP4 / NEON instructions are available, indicates - /// whether the FP VFM[AS] instructions are slow (if so, don't use them). - bool SlowFPVFMx = false; - - /// HasVMLxForwarding - If true, NEON has special multiplier accumulator - /// forwarding to allow mul + mla being issued back to back. - bool HasVMLxForwarding = false; - - /// SlowFPBrcc - True if floating point compare + branch is slow. - bool SlowFPBrcc = false; - - /// InThumbMode - True if compiling for Thumb, false for ARM. - bool InThumbMode = false; - - /// UseSoftFloat - True if we're using software floating point features. - bool UseSoftFloat = false; - - /// UseMISched - True if MachineScheduler should be used for this subtarget. - bool UseMISched = false; - - /// DisablePostRAScheduler - False if scheduling should happen again after - /// register allocation. - bool DisablePostRAScheduler = false; - - /// HasThumb2 - True if Thumb2 instructions are supported. - bool HasThumb2 = false; - - /// NoARM - True if subtarget does not support ARM mode execution. - bool NoARM = false; - - /// ReserveR9 - True if R9 is not available as a general purpose register. - bool ReserveR9 = false; - - /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of - /// 32-bit imms (including global addresses). - bool NoMovt = false; - /// SupportsTailCall - True if the OS supports tail call. The dynamic linker /// must be able to synthesize call stubs for interworking between ARM and /// Thumb. bool SupportsTailCall = false; - /// HasFP16 - True if subtarget supports half-precision FP conversions - bool HasFP16 = false; - - /// HasFullFP16 - True if subtarget supports half-precision FP operations - bool HasFullFP16 = false; - - /// HasFP16FML - True if subtarget supports half-precision FP fml operations - bool HasFP16FML = false; - - /// HasBF16 - True if subtarget supports BFloat16 floating point operations - bool HasBF16 = false; - - /// HasMatMulInt8 - True if subtarget supports 8-bit integer matrix multiply - bool HasMatMulInt8 = false; - - /// HasD32 - True if subtarget has the full 32 double precision - /// FP registers for VFPv3. - bool HasD32 = false; - - /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode - bool HasHardwareDivideInThumb = false; - - /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode - bool HasHardwareDivideInARM = false; - - /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier - /// instructions. - bool HasDataBarrier = false; - - /// HasFullDataBarrier - True if the subtarget supports DFB data barrier - /// instruction. - bool HasFullDataBarrier = false; - - /// HasV7Clrex - True if the subtarget supports CLREX instructions - bool HasV7Clrex = false; - - /// HasAcquireRelease - True if the subtarget supports v8 atomics (LDA/LDAEX etc) - /// instructions - bool HasAcquireRelease = false; - - /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions - /// over 16-bit ones. - bool Pref32BitThumb = false; - - /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions - /// that partially update CPSR and add false dependency on the previous - /// CPSR setting instruction. - bool AvoidCPSRPartialUpdate = false; - - /// CheapPredicableCPSRDef - If true, disable +1 predication cost - /// for instructions updating CPSR. Enabled for Cortex-A57. - bool CheapPredicableCPSRDef = false; - - /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting - /// movs with shifter operand (i.e. asr, lsl, lsr). - bool AvoidMOVsShifterOperand = false; - - /// HasRetAddrStack - Some processors perform return stack prediction. CodeGen should - /// avoid issue "normal" call instructions to callees which do not return. - bool HasRetAddrStack = false; - - /// HasBranchPredictor - True if the subtarget has a branch predictor. Having - /// a branch predictor or not changes the expected cost of taking a branch - /// which affects the choice of whether to use predicated instructions. - bool HasBranchPredictor = true; - - /// HasMPExtension - True if the subtarget supports Multiprocessing - /// extension (ARMv7 only). - bool HasMPExtension = false; - - /// HasVirtualization - True if the subtarget supports the Virtualization - /// extension. - bool HasVirtualization = false; - - /// HasFP64 - If true, the floating point unit supports double - /// precision. - bool HasFP64 = false; - - /// If true, the processor supports the Performance Monitor Extensions. These - /// include a generic cycle-counter as well as more fine-grained (often - /// implementation-specific) events. - bool HasPerfMon = false; - - /// HasTrustZone - if true, processor supports TrustZone security extensions - bool HasTrustZone = false; - - /// Has8MSecExt - if true, processor supports ARMv8-M Security Extensions - bool Has8MSecExt = false; - - /// HasSHA2 - if true, processor supports SHA1 and SHA256 - bool HasSHA2 = false; - - /// HasAES - if true, processor supports AES - bool HasAES = false; - - /// HasCrypto - if true, processor supports Cryptography extensions - bool HasCrypto = false; - - /// HasCRC - if true, processor supports CRC instructions - bool HasCRC = false; - - /// HasRAS - if true, the processor supports RAS extensions - bool HasRAS = false; - - /// HasLOB - if true, the processor supports the Low Overhead Branch extension - bool HasLOB = false; - - bool HasPACBTI = false; - - /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are - /// particularly effective at zeroing a VFP register. - bool HasZeroCycleZeroing = false; - - /// HasFPAO - if true, processor does positive address offset computation faster - bool HasFPAO = false; - - /// HasFuseAES - if true, processor executes back to back AES instruction - /// pairs faster. - bool HasFuseAES = false; - - /// HasFuseLiterals - if true, processor executes back to back - /// bottom and top halves of literal generation faster. - bool HasFuseLiterals = false; - - /// If true, if conversion may decide to leave some instructions unpredicated. - bool IsProfitableToUnpredicate = false; - - /// If true, VMOV will be favored over VGETLNi32. - bool HasSlowVGETLNi32 = false; - - /// If true, VMOV will be favored over VDUP. - bool HasSlowVDUP32 = false; - - /// If true, VMOVSR will be favored over VMOVDRR. - bool PreferVMOVSR = false; - - /// If true, ISHST barriers will be used for Release semantics. - bool PreferISHST = false; - - /// If true, a VLDM/VSTM starting with an odd register number is considered to - /// take more microops than single VLDRS/VSTRS. - bool SlowOddRegister = false; - - /// If true, loading into a D subregister will be penalized. - bool SlowLoadDSubregister = false; - - /// If true, use a wider stride when allocating VFP registers. - bool UseWideStrideVFP = false; - - /// If true, the AGU and NEON/FPU units are multiplexed. - bool HasMuxedUnits = false; - - /// If true, VMOVS will never be widened to VMOVD. - bool DontWidenVMOVS = false; - - /// If true, splat a register between VFP and NEON instructions. - bool SplatVFPToNeon = false; - - /// If true, run the MLx expansion pass. - bool ExpandMLx = false; - - /// If true, VFP/NEON VMLA/VMLS have special RAW hazards. - bool HasVMLxHazards = false; - - // If true, read thread pointer from coprocessor register. - bool ReadTPHard = false; - - /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. - bool UseNEONForFPMovs = false; - - /// If true, VLDn instructions take an extra cycle for unaligned accesses. - bool CheckVLDnAlign = false; - - /// If true, VFP instructions are not pipelined. - bool NonpipelinedVFP = false; - - /// StrictAlign - If true, the subtarget disallows unaligned memory - /// accesses for some types. For details, see - /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). - bool StrictAlign = false; - /// RestrictIT - If true, the subtarget disallows generation of complex IT /// blocks. bool RestrictIT = false; - /// HasDSP - If true, the subtarget supports the DSP (saturating arith - /// and such) instructions. - bool HasDSP = false; - - /// NaCl TRAP instruction is generated instead of the regular TRAP. - bool UseNaClTrap = false; - - /// Generate calls via indirect call instructions. - bool GenLongCalls = false; - - /// Generate code that does not contain data access to code sections. - bool GenExecuteOnly = false; - - /// Target machine allowed unsafe FP math (such as use of NEON fp) - bool UnsafeFPMath = false; - /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS). bool UseSjLjEH = false; - /// Has speculation barrier - bool HasSB = false; - - /// Implicitly convert an instruction to a different one if its immediates - /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. - bool NegativeImmediates = true; - - /// Mitigate against the cve-2021-35465 security vulnurability. - bool FixCMSE_CVE_2021_35465 = false; - - /// Harden against Straight Line Speculation for Returns and Indirect - /// Branches. - bool HardenSlsRetBr = false; - - /// Harden against Straight Line Speculation for indirect calls. - bool HardenSlsBlr = false; - - /// Generate thunk code for SLS mitigation in the normal text section. - bool HardenSlsNoComdat = false; - /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. Align stackAlignment = Align(4); @@ -540,10 +224,6 @@ /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; - /// NoBTIAtReturnTwice - Don't place a BTI instruction after - /// return-twice constructs (setjmp) - bool NoBTIAtReturnTwice = false; - /// Options passed via command line that could influence the target const TargetOptions &Options; @@ -622,38 +302,12 @@ std::bitset<8> CoprocCDE = {}; public: - void computeIssueWidth(); +// Getters for SubtargetFeatures defined in tablegen +#define GET_SUBTARGETINFO_MACRO(NAME, ATTRIBUTE, DEFAULT, GETTER) \ + bool GETTER() const { return ATTRIBUTE; } +#include "ARMGenSubtargetInfo.inc" - bool hasV4TOps() const { return HasV4TOps; } - bool hasV5TOps() const { return HasV5TOps; } - bool hasV5TEOps() const { return HasV5TEOps; } - bool hasV6Ops() const { return HasV6Ops; } - bool hasV6MOps() const { return HasV6MOps; } - bool hasV6KOps() const { return HasV6KOps; } - bool hasV6T2Ops() const { return HasV6T2Ops; } - bool hasV7Ops() const { return HasV7Ops; } - bool hasV8Ops() const { return HasV8Ops; } - bool hasV8_1aOps() const { return HasV8_1aOps; } - bool hasV8_2aOps() const { return HasV8_2aOps; } - bool hasV8_3aOps() const { return HasV8_3aOps; } - bool hasV8_4aOps() const { return HasV8_4aOps; } - bool hasV8_5aOps() const { return HasV8_5aOps; } - bool hasV8_6aOps() const { return HasV8_6aOps; } - bool hasV8_7aOps() const { return HasV8_7aOps; } - bool hasV8_8aOps() const { return HasV8_8aOps; } - bool hasV9_0aOps() const { return HasV9_0aOps; } - bool hasV9_1aOps() const { return HasV9_1aOps; } - bool hasV9_2aOps() const { return HasV9_2aOps; } - bool hasV9_3aOps() const { return HasV9_3aOps; } - bool hasV8MBaselineOps() const { return HasV8MBaselineOps; } - bool hasV8MMainlineOps() const { return HasV8MMainlineOps; } - bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; } - bool hasMVEIntegerOps() const { return HasMVEIntegerOps; } - bool hasMVEFloatOps() const { return HasMVEFloatOps; } - bool hasCDEOps() const { return HasCDEOps; } - bool hasFPRegs() const { return HasFPRegs; } - bool hasFPRegs16() const { return HasFPRegs16; } - bool hasFPRegs64() const { return HasFPRegs64; } + void computeIssueWidth(); /// @{ /// These functions are obsolete, please consider adding subtarget features @@ -673,31 +327,14 @@ bool hasARMOps() const { return !NoARM; } - bool hasVFP2Base() const { return HasVFPv2SP; } - bool hasVFP3Base() const { return HasVFPv3D16SP; } - bool hasVFP4Base() const { return HasVFPv4D16SP; } - bool hasFPARMv8Base() const { return HasFPARMv8D16SP; } - bool hasNEON() const { return HasNEON; } - bool hasSHA2() const { return HasSHA2; } - bool hasAES() const { return HasAES; } - bool hasCrypto() const { return HasCrypto; } - bool hasDotProd() const { return HasDotProd; } - bool hasCRC() const { return HasCRC; } - bool hasRAS() const { return HasRAS; } - bool hasLOB() const { return HasLOB; } - bool hasPACBTI() const { return HasPACBTI; } - bool hasVirtualization() const { return HasVirtualization; } - bool useNEONForSinglePrecisionFP() const { - return hasNEON() && UseNEONForSinglePrecisionFP; + return hasNEON() && hasNEONForFP(); } - bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; } - bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } - bool hasDataBarrier() const { return HasDataBarrier; } - bool hasFullDataBarrier() const { return HasFullDataBarrier; } - bool hasV7Clrex() const { return HasV7Clrex; } - bool hasAcquireRelease() const { return HasAcquireRelease; } + bool hasVFP2Base() const { return hasVFPv2SP(); } + bool hasVFP3Base() const { return hasVFPv3D16SP(); } + bool hasVFP4Base() const { return hasVFPv4D16SP(); } + bool hasFPARMv8Base() const { return hasFPARMv8D16SP(); } bool hasAnyDataBarrier() const { return HasDataBarrier || (hasV6Ops() && !isThumb()); @@ -710,43 +347,7 @@ } bool useFPVFMx16() const { return useFPVFMx() && hasFullFP16(); } bool useFPVFMx64() const { return useFPVFMx() && hasFP64(); } - bool hasVMLxForwarding() const { return HasVMLxForwarding; } - bool isFPBrccSlow() const { return SlowFPBrcc; } - bool hasFP64() const { return HasFP64; } - bool hasPerfMon() const { return HasPerfMon; } - bool hasTrustZone() const { return HasTrustZone; } - bool has8MSecExt() const { return Has8MSecExt; } - bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } - bool hasFPAO() const { return HasFPAO; } - bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; } - bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; } - bool hasSlowVDUP32() const { return HasSlowVDUP32; } - bool preferVMOVSR() const { return PreferVMOVSR; } - bool preferISHSTBarriers() const { return PreferISHST; } - bool expandMLx() const { return ExpandMLx; } - bool hasVMLxHazards() const { return HasVMLxHazards; } - bool hasSlowOddRegister() const { return SlowOddRegister; } - bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; } - bool useWideStrideVFP() const { return UseWideStrideVFP; } - bool hasMuxedUnits() const { return HasMuxedUnits; } - bool dontWidenVMOVS() const { return DontWidenVMOVS; } - bool useSplatVFPToNeon() const { return SplatVFPToNeon; } - bool useNEONForFPMovs() const { return UseNEONForFPMovs; } - bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; } - bool nonpipelinedVFP() const { return NonpipelinedVFP; } - bool prefers32BitThumb() const { return Pref32BitThumb; } - bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } - bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; } - bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } - bool hasRetAddrStack() const { return HasRetAddrStack; } - bool hasBranchPredictor() const { return HasBranchPredictor; } - bool hasMPExtension() const { return HasMPExtension; } - bool hasDSP() const { return HasDSP; } - bool useNaClTrap() const { return UseNaClTrap; } bool useSjLjEH() const { return UseSjLjEH; } - bool hasSB() const { return HasSB; } - bool genLongCalls() const { return GenLongCalls; } - bool genExecuteOnly() const { return GenExecuteOnly; } bool hasBaseDSP() const { if (isThumb()) return hasDSP(); @@ -754,19 +355,9 @@ return hasV5TEOps(); } - bool hasFP16() const { return HasFP16; } - bool hasD32() const { return HasD32; } - bool hasFullFP16() const { return HasFullFP16; } - bool hasFP16FML() const { return HasFP16FML; } - bool hasBF16() const { return HasBF16; } - - bool hasFuseAES() const { return HasFuseAES; } - bool hasFuseLiterals() const { return HasFuseLiterals; } /// Return true if the CPU supports any kind of instruction fusion. bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); } - bool hasMatMulInt8() const { return HasMatMulInt8; } - const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } @@ -826,17 +417,12 @@ bool isRWPI() const; bool useMachineScheduler() const { return UseMISched; } - bool disablePostRAScheduler() const { return DisablePostRAScheduler; } - bool useSoftFloat() const { return UseSoftFloat; } - bool isThumb() const { return InThumbMode; } bool hasMinSize() const { return OptMinSize; } - bool isThumb1Only() const { return InThumbMode && !HasThumb2; } - bool isThumb2() const { return InThumbMode && HasThumb2; } - bool hasThumb2() const { return HasThumb2; } + bool isThumb1Only() const { return isThumb() && !hasThumb2(); } + bool isThumb2() const { return isThumb() && hasThumb2(); } bool isMClass() const { return ARMProcClass == MClass; } bool isRClass() const { return ARMProcClass == RClass; } bool isAClass() const { return ARMProcClass == AClass; } - bool isReadTPHard() const { return ReadTPHard; } bool isR9Reserved() const { return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; @@ -957,14 +543,6 @@ bool ignoreCSRForAllocationOrder(const MachineFunction &MF, unsigned PhysReg) const override; unsigned getGPRAllocationOrder(const MachineFunction &MF) const; - - bool fixCMSE_CVE_2021_35465() const { return FixCMSE_CVE_2021_35465; } - - bool hardenSlsRetBr() const { return HardenSlsRetBr; } - bool hardenSlsBlr() const { return HardenSlsBlr; } - bool hardenSlsNoComdat() const { return HardenSlsNoComdat; } - - bool getNoBTIAtReturnTwice() const { return NoBTIAtReturnTwice; } }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -245,7 +245,7 @@ const FeatureBitset &Bits = getFeatureBits(); if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters (Options.UnsafeFPMath || isTargetDarwin())) - UseNEONForSinglePrecisionFP = true; + HasNEONForFP = true; if (isRWPI()) ReserveR9 = true; diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -1803,6 +1803,34 @@ OS << "} // end namespace llvm\n\n"; OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n"; + OS << "\n#ifdef GET_SUBTARGETINFO_MACRO\n"; + std::vector FeatureList = + Records.getAllDerivedDefinitions("SubtargetFeature"); + llvm::sort(FeatureList, LessRecordFieldName()); + for (const Record *Feature : FeatureList) { + + const auto Name = Feature->getValueAsString("Name"); + const auto Attribute = Feature->getValueAsString("Attribute"); + const auto Value = Feature->getValueAsString("Value"); + + // Only handle boolean features for now, excluding BitVectors and enums. + const bool IsBool = (Value == "false" || Value == "true") && + !StringRef(Attribute).contains('['); + if (!IsBool) + continue; + + // Some features default to true, with values set to false if enabled. + const auto Default = Value == "false" ? "true" : "false"; + + // Define the getter with lowercased first char: xxxYyy() { return xxxYyy; } + const auto Getter = Attribute.substr(0, 1).lower() + Attribute.substr(1); + + OS << "GET_SUBTARGETINFO_MACRO(" << Name << ", " << Attribute << ", " + << Default << ", " << Getter << ")\n"; + } + OS << "#undef GET_SUBTARGETINFO_MACRO\n"; + OS << "#endif // GET_SUBTARGETINFO_MACRO\n\n"; + OS << "\n#ifdef GET_SUBTARGETINFO_MC_DESC\n"; OS << "#undef GET_SUBTARGETINFO_MC_DESC\n\n";