Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -6421,3 +6421,10 @@ def int_x86_clzero : GCCBuiltin<"__builtin_ia32_clzero">, Intrinsic<[], [llvm_ptr_ty], []>; } + +//===----------------------------------------------------------------------===// +// Cache-line demote +let TargetPrefix = "x86" in { + def int_x86_cldemote : GCCBuiltin<"__builtin_ia32_cldemote">, + Intrinsic<[], [llvm_ptr_ty], []>; +} Index: lib/Support/Host.cpp =================================================================== --- lib/Support/Host.cpp +++ lib/Support/Host.cpp @@ -1254,6 +1254,7 @@ Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); + Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); Features["ibt"] = HasLeaf7 && ((EDX >> 20) & 1); Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -230,6 +230,8 @@ "Enable MONITORX/MWAITX timer functionality">; def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", "Enable Cache Line Zero">; +def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", + "Enable Cache Demote">; def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", "Support MPX instructions">; def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -881,6 +881,7 @@ def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">; def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">; +def HasCLDEMOTE : Predicate<"Subtarget->hasCLDEMOTE()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasMPX : Predicate<"Subtarget->hasMPX()">; @@ -2758,6 +2759,9 @@ def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src", [(int_x86_clwb addr:$src)], IIC_SSE_PREFETCH>, PD; +def CLDEMOTE : I<0x1C, MRM0m, (outs), (ins i8mem:$src), "cldemote\t$src", + [(int_x86_cldemote addr:$src)]>, TB, Requires<[HasCLDEMOTE]>; + //===----------------------------------------------------------------------===// // Subsystems. //===----------------------------------------------------------------------===// Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -204,6 +204,9 @@ /// Processor has Cache Line Zero instruction bool HasCLZERO; + /// Processor has Cache Line Demote instruction + bool HasCLDEMOTE; + /// Processor has Prefetch with intent to Write instruction bool HasPREFETCHWT1; @@ -573,6 +576,7 @@ bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool hasMWAITX() const { return HasMWAITX; } bool hasCLZERO() const { return HasCLZERO; } + bool hasCLDEMOTE() const { return HasCLDEMOTE; } bool isSHLDSlow() const { return IsSHLDSlow; } bool isPMULLDSlow() const { return IsPMULLDSlow; } bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -318,6 +318,7 @@ HasLAHFSAHF = false; HasMWAITX = false; HasCLZERO = false; + HasCLDEMOTE = false; HasMPX = false; HasSHSTK = false; HasIBT = false; Index: test/CodeGen/X86/cldemote-intrinsic.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/cldemote-intrinsic.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+cldemote | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-pc-linux -mattr=+cldemote | FileCheck %s --check-prefix=X32 + +define void @test_cldemote(i8* %p) { +; X64-LABEL: test_cldemote: +; X64: # %bb.0: # %entry +; X64-NEXT: cldemote (%rdi) +; X64-NEXT: retq +; +; X32-LABEL: test_cldemote: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: cldemote (%eax) +; X32-NEXT: retl +entry: + tail call void @llvm.x86.cldemote(i8* %p) + ret void +} + +declare void @llvm.x86.cldemote(i8*) Index: test/MC/Disassembler/X86/x86-32.txt =================================================================== --- test/MC/Disassembler/X86/x86-32.txt +++ test/MC/Disassembler/X86/x86-32.txt @@ -820,3 +820,9 @@ # CHECK: ptwritel %eax 0xf3 0x0f 0xae 0xe0 + +# CHECK: cldemote 4(%eax) +0x0f 0x1c 0x40 0x04 + +# CHECK: cldemote -559038737(%ebx,%ecx,8) +0x0f,0x1c,0x84,0xcb,0xef,0xbe,0xad,0xde Index: test/MC/Disassembler/X86/x86-64.txt =================================================================== --- test/MC/Disassembler/X86/x86-64.txt +++ test/MC/Disassembler/X86/x86-64.txt @@ -516,3 +516,9 @@ # CHECK: ptwriteq %rax 0xf3 0x48 0x0f 0xae 0xe0 + +# CHECK: cldemote 4(%rax) +0x0f 0x1c 0x40 0x04 + +# CHECK: cldemote -559038737(%rbx,%rcx,8) +0x0f,0x1c,0x84,0xcb,0xef,0xbe,0xad,0xde Index: test/MC/X86/x86-32-coverage.s =================================================================== --- test/MC/X86/x86-32-coverage.s +++ test/MC/X86/x86-32-coverage.s @@ -10741,3 +10741,10 @@ // CHECK: encoding: [0xf0,0x01,0x37] lock add %esi, (%edi) +// CHECK: cldemote 4(%eax) +// CHECK: encoding: [0x0f,0x1c,0x40,0x04] + cldemote 4(%eax) + +// CHECK: cldemote 3735928559(%ebx,%ecx,8) +// CHECK: encoding: [0x0f,0x1c,0x84,0xcb,0xef,0xbe,0xad,0xde] + cldemote 0xdeadbeef(%ebx,%ecx,8) Index: test/MC/X86/x86-64.s =================================================================== --- test/MC/X86/x86-64.s +++ test/MC/X86/x86-64.s @@ -1559,6 +1559,14 @@ // CHECK: encoding: [0xf3,0x48,0x0f,0xae,0xe0] ptwriteq %rax +// CHECK: cldemote 4(%rax) +// CHECK: encoding: [0x0f,0x1c,0x40,0x04] +cldemote 4(%rax) + +// CHECK: cldemote 3735928559(%rbx,%rcx,8) +// CHECK: encoding: [0x0f,0x1c,0x84,0xcb,0xef,0xbe,0xad,0xde] +cldemote 0xdeadbeef(%rbx,%rcx,8) + // __asm __volatile( // "pushf \n\t" // "popf \n\t"