diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -27,7 +27,7 @@ "16-bit mode (i8086)">; //===----------------------------------------------------------------------===// -// X86 Subtarget features +// X86 Subtarget ISA features //===----------------------------------------------------------------------===// def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", @@ -100,20 +100,6 @@ def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", "64-bit with cmpxchg16b", [FeatureCMPXCHG8B]>; -def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", - "SHLD instruction is slow">; -def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", - "PMULLD instruction is slow">; -def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", - "true", - "PMADDWD is slower than PMULLD">; -// FIXME: This should not apply to CPUs that do not have SSE. -def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", - "IsUAMem16Slow", "true", - "Slow unaligned 16-byte memory access">; -def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", - "IsUAMem32Slow", "true", - "Slow unaligned 32-byte memory access">; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions", [FeatureSSE3]>; @@ -255,17 +241,6 @@ def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", "Support AMX-BF16 instructions", [FeatureAMXTILE]>; -def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", - "Use LEA for adjusting the stack pointer">; -def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", - "HasSlowDivide32", "true", - "Use 8-bit divide for positive values less than 256">; -def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl", - "HasSlowDivide64", "true", - "Use 32-bit divide for positive values less than 2^32">; -def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", - "PadShortFunctions", "true", - "Pad short functions">; def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", "Invalidate Process-Context Identifier">; def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", @@ -296,31 +271,163 @@ "Support TSXLDTRK instructions">; def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", "Has UINTR Instructions">; +def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", + "platform configuration instruction">; +def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", + "Support movdiri instruction">; +def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", + "Support movdir64b instruction">; + +// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka +// "string operations"). See "REP String Enhancement" in the Intel Software +// Development Manual. This feature essentially means that REP MOVSB will copy +// using the largest available size instead of copying bytes one by one, making +// it at least as fast as REPMOVS{W,D,Q}. +def FeatureERMSB + : SubtargetFeature< + "ermsb", "HasERMSB", "true", + "REP MOVS/STOS are fast">; + +// Icelake and newer processors have Fast Short REP MOV. +def FeatureFSRM + : SubtargetFeature< + "fsrm", "HasFSRM", "true", + "REP MOVSB of short lengths is faster">; + +def FeatureSoftFloat + : SubtargetFeature<"soft-float", "UseSoftFloat", "true", + "Use software floating point features">; + +//===----------------------------------------------------------------------===// +// X86 Subtarget Security Mitigation features +//===----------------------------------------------------------------------===// + +// Lower indirect calls using a special construct called a `retpoline` to +// mitigate potential Spectre v2 attacks against them. +def FeatureRetpolineIndirectCalls + : SubtargetFeature< + "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", + "Remove speculation of indirect calls from the generated code">; + +// Lower indirect branches and switches either using conditional branch trees +// or using a special construct called a `retpoline` to mitigate potential +// Spectre v2 attacks against them. +def FeatureRetpolineIndirectBranches + : SubtargetFeature< + "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", + "Remove speculation of indirect branches from the generated code">; + +// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and +// `retpoline-indirect-branches` above. +def FeatureRetpoline + : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", + "Remove speculation of indirect branches from the " + "generated code, either by avoiding them entirely or " + "lowering them with a speculation blocking construct", + [FeatureRetpolineIndirectCalls, + FeatureRetpolineIndirectBranches]>; + +// Rely on external thunks for the emitted retpoline calls. This allows users +// to provide their own custom thunk definitions in highly specialized +// environments such as a kernel that does boot-time hot patching. +def FeatureRetpolineExternalThunk + : SubtargetFeature< + "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", + "When lowering an indirect call or branch using a `retpoline`, rely " + "on the specified user provided thunk rather than emitting one " + "ourselves. Only has effect when combined with some other retpoline " + "feature", [FeatureRetpolineIndirectCalls]>; + +// Mitigate LVI attacks against indirect calls/branches and call returns +def FeatureLVIControlFlowIntegrity + : SubtargetFeature< + "lvi-cfi", "UseLVIControlFlowIntegrity", "true", + "Prevent indirect calls/branches from using a memory operand, and " + "precede all indirect calls/branches from a register with an " + "LFENCE instruction to serialize control flow. Also decompose RET " + "instructions into a POP+LFENCE+JMP sequence.">; + +// Enable SESES to mitigate speculative execution attacks +def FeatureSpeculativeExecutionSideEffectSuppression + : SubtargetFeature< + "seses", "UseSpeculativeExecutionSideEffectSuppression", "true", + "Prevent speculative execution side channel timing attacks by " + "inserting a speculation barrier before memory reads, memory writes, " + "and conditional branches. Implies LVI Control Flow integrity.", + [FeatureLVIControlFlowIntegrity]>; + +// Mitigate LVI attacks against data loads +def FeatureLVILoadHardening + : SubtargetFeature< + "lvi-load-hardening", "UseLVILoadHardening", "true", + "Insert LFENCE instructions to prevent data speculatively injected " + "into loads from being used maliciously.">; + +//===----------------------------------------------------------------------===// +// X86 Subtarget Tuning features +//===----------------------------------------------------------------------===// + +def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", + "SHLD instruction is slow">; + +def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", + "PMULLD instruction is slow">; + +def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", + "true", + "PMADDWD is slower than PMULLD">; + +// FIXME: This should not apply to CPUs that do not have SSE. +def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", + "IsUAMem16Slow", "true", + "Slow unaligned 16-byte memory access">; + +def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", + "IsUAMem32Slow", "true", + "Slow unaligned 32-byte memory access">; + +def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", + "Use LEA for adjusting the stack pointer">; + +def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", + "HasSlowDivide32", "true", + "Use 8-bit divide for positive values less than 256">; + +def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl", + "HasSlowDivide64", "true", + "Use 32-bit divide for positive values less than 2^32">; + +def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", + "PadShortFunctions", "true", + "Pad short functions">; + // On some processors, instructions that implicitly take two memory operands are // slow. In practice, this means that CALL, PUSH, and POP with memory operands // should be avoided in favor of a MOV + register CALL/PUSH/POP. def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", "SlowTwoMemOps", "true", "Two memory operand instructions are slow">; + def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", "LEA instruction needs inputs at AG stage">; + def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; + def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", "LEA instruction with 3 ops or certain registers is slow">; + def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", "INC and DEC instructions are slower than ADD and SUB">; -def FeatureSoftFloat - : SubtargetFeature<"soft-float", "UseSoftFloat", "true", - "Use software floating point features">; + def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", "HasPOPCNTFalseDeps", "true", "POPCNT has a false dependency on dest register">; + def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", "HasLZCNTFalseDeps", "true", "LZCNT/TZCNT have a false dependency on dest register">; -def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", - "platform configuration instruction">; + // On recent X86 (port bound) processors, its preferable to combine to a single shuffle // using a variable mask over multiple fixed shuffles. def FeatureFastVariableCrossLaneShuffle @@ -338,6 +445,7 @@ : SubtargetFeature<"vzeroupper", "InsertVZEROUPPER", "true", "Should insert vzeroupper instructions">; + // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if // vector FSQRT has higher throughput than the corresponding NR code. @@ -351,27 +459,32 @@ def FeatureFastVectorFSQRT : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", "true", "Vector SQRT is fast (disable Newton-Raphson)">; + // If lzcnt has equivalent latency/throughput to most simple integer ops, it can // be used to replace test/set sequences. def FeatureFastLZCNT : SubtargetFeature< "fast-lzcnt", "HasFastLZCNT", "true", "LZCNT instructions are as fast as most simple integer ops">; + // If the target can efficiently decode NOPs upto 7-bytes in length. def FeatureFast7ByteNOP : SubtargetFeature< "fast-7bytenop", "HasFast7ByteNOP", "true", "Target can quickly decode up to 7 byte NOPs">; + // If the target can efficiently decode NOPs upto 11-bytes in length. def FeatureFast11ByteNOP : SubtargetFeature< "fast-11bytenop", "HasFast11ByteNOP", "true", "Target can quickly decode up to 11 byte NOPs">; + // If the target can efficiently decode NOPs upto 15-bytes in length. def FeatureFast15ByteNOP : SubtargetFeature< "fast-15bytenop", "HasFast15ByteNOP", "true", "Target can quickly decode up to 15 byte NOPs">; + // Sandy Bridge and newer processors can use SHLD with the same source on both // inputs to implement rotate to avoid the partial flag update of the normal // rotate instructions. @@ -380,22 +493,6 @@ "fast-shld-rotate", "HasFastSHLDRotate", "true", "SHLD can be used as a faster rotate">; -// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka -// "string operations"). See "REP String Enhancement" in the Intel Software -// Development Manual. This feature essentially means that REP MOVSB will copy -// using the largest available size instead of copying bytes one by one, making -// it at least as fast as REPMOVS{W,D,Q}. -def FeatureERMSB - : SubtargetFeature< - "ermsb", "HasERMSB", "true", - "REP MOVS/STOS are fast">; - -// Icelake and newer processors have Fast Short REP MOV. -def FeatureFSRM - : SubtargetFeature< - "fsrm", "HasFSRM", "true", - "REP MOVSB of short lengths is faster">; - // Bulldozer and newer processors can merge CMP/TEST (but not other // instructions) with conditional branches. def FeatureBranchFusion @@ -429,73 +526,6 @@ : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", "Prefer AVX512 mask registers over PTEST/MOVMSK">; -// Lower indirect calls using a special construct called a `retpoline` to -// mitigate potential Spectre v2 attacks against them. -def FeatureRetpolineIndirectCalls - : SubtargetFeature< - "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", - "Remove speculation of indirect calls from the generated code">; - -// Lower indirect branches and switches either using conditional branch trees -// or using a special construct called a `retpoline` to mitigate potential -// Spectre v2 attacks against them. -def FeatureRetpolineIndirectBranches - : SubtargetFeature< - "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", - "Remove speculation of indirect branches from the generated code">; - -// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and -// `retpoline-indirect-branches` above. -def FeatureRetpoline - : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", - "Remove speculation of indirect branches from the " - "generated code, either by avoiding them entirely or " - "lowering them with a speculation blocking construct", - [FeatureRetpolineIndirectCalls, - FeatureRetpolineIndirectBranches]>; - -// Rely on external thunks for the emitted retpoline calls. This allows users -// to provide their own custom thunk definitions in highly specialized -// environments such as a kernel that does boot-time hot patching. -def FeatureRetpolineExternalThunk - : SubtargetFeature< - "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", - "When lowering an indirect call or branch using a `retpoline`, rely " - "on the specified user provided thunk rather than emitting one " - "ourselves. Only has effect when combined with some other retpoline " - "feature", [FeatureRetpolineIndirectCalls]>; - -// Mitigate LVI attacks against indirect calls/branches and call returns -def FeatureLVIControlFlowIntegrity - : SubtargetFeature< - "lvi-cfi", "UseLVIControlFlowIntegrity", "true", - "Prevent indirect calls/branches from using a memory operand, and " - "precede all indirect calls/branches from a register with an " - "LFENCE instruction to serialize control flow. Also decompose RET " - "instructions into a POP+LFENCE+JMP sequence.">; - -// Enable SESES to mitigate speculative execution attacks -def FeatureSpeculativeExecutionSideEffectSuppression - : SubtargetFeature< - "seses", "UseSpeculativeExecutionSideEffectSuppression", "true", - "Prevent speculative execution side channel timing attacks by " - "inserting a speculation barrier before memory reads, memory writes, " - "and conditional branches. Implies LVI Control Flow integrity.", - [FeatureLVIControlFlowIntegrity]>; - -// Mitigate LVI attacks against data loads -def FeatureLVILoadHardening - : SubtargetFeature< - "lvi-load-hardening", "UseLVILoadHardening", "true", - "Insert LFENCE instructions to prevent data speculatively injected " - "into loads from being used maliciously.">; - -// Direct Move instructions. -def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", - "Support movdiri instruction">; -def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", - "Support movdir64b instruction">; - def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", "Indicates that the BEXTR instruction is implemented as a single uop " "with good throughput">;