Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -6421,3 +6421,18 @@ def int_x86_clzero : GCCBuiltin<"__builtin_ia32_clzero">, Intrinsic<[], [llvm_ptr_ty], []>; } + +//===----------------------------------------------------------------------===// +// Wait and pause enhancements +let TargetPrefix = "x86" in { + def int_x86_umonitor : GCCBuiltin<"__builtin_ia32_umonitor">, + Intrinsic<[], [llvm_ptr_ty], []>; + def int_x86_umwait32 : + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_umwait64 : + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_tpause32 : + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_tpause64 : + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +} Index: lib/Support/Host.cpp =================================================================== --- lib/Support/Host.cpp +++ lib/Support/Host.cpp @@ -1245,6 +1245,7 @@ Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); + Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -251,6 +251,8 @@ "Cache Line Write Back">; def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", "Support RDPID instructions">; +def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", + "Wait and pause enhancements">; // On some processors, instructions that implicitly take two memory operands are // slow. In practice, this means that CALL, PUSH, and POP with memory operands // should be avoided in favor of a MOV + register CALL/PUSH/POP. Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -889,6 +889,7 @@ def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">; def HasCLWB : Predicate<"Subtarget->hasCLWB()">; def HasRDPID : Predicate<"Subtarget->hasRDPID()">; +def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">; @@ -2670,6 +2671,40 @@ def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>, Requires<[ In64BitMode ]>; +//===----------------------------------------------------------------------===// +// WAITPKG Instructions +// +let SchedRW = [WriteSystem] in { + def UMONITOR16 : I<0xAE, MRM6r, (outs), (ins GR16:$src), + "umonitor\t$src", [(int_x86_umonitor GR16:$src)]>, + XS, AdSize16, Requires<[HasWAITPKG, Not64BitMode]>; + def UMONITOR32 : I<0xAE, MRM6r, (outs), (ins GR32:$src), + "umonitor\t$src", [(int_x86_umonitor GR32:$src)]>, + XS, AdSize32, Requires<[HasWAITPKG]>; + def UMONITOR64 : I<0xAE, MRM6r, (outs), (ins GR64:$src), + "umonitor\t$src", [(int_x86_umonitor GR64:$src)]>, + XS, AdSize64, Requires<[HasWAITPKG, In64BitMode]>; + + let Uses = [ EAX, EDX ] in { + def UMWAIT32 : I<0xAE, MRM6r, + (outs), (ins GR32:$src), + "umwait\t$src", [(int_x86_umwait32 GR32:$src, EAX, EDX)]>, + XD, Requires<[HasWAITPKG, Not64BitMode]>; + def UMWAIT64 : I<0xAE, MRM6r, + (outs), (ins GR64:$src), + "umwait\t$src", [(int_x86_umwait64 GR64:$src, EAX, EDX)]>, + XD, Requires<[HasWAITPKG, In64BitMode]>; + def TPAUSE32 : I<0xAE, MRM6r, + (outs), (ins GR32:$src), + "tpause\t$src", [(int_x86_tpause32 GR32:$src, EAX, EDX)]>, + PD, Requires<[HasWAITPKG, Not64BitMode]>; + def TPAUSE64 : I<0xAE, MRM6r, + (outs), (ins GR64:$src), + "tpause\t$src", [(int_x86_tpause64 GR64:$src, EAX, EDX)]>, + PD, Requires<[HasWAITPKG, In64BitMode]>; + } +} // SchedRW + //===----------------------------------------------------------------------===// // CLZERO Instruction // Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -3528,7 +3528,7 @@ TB, Requires<[HasSSE2]>; def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, - TB, Requires<[HasMFence]>; + PS, Requires<[HasMFence]>; } // SchedRW def : Pat<(X86MFence), (MFENCE)>; Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -362,6 +362,9 @@ /// Processor support RDPID instruction bool HasRDPID; + /// Processor supports WaitPKG instructions + bool HasWAITPKG; + /// Use a retpoline thunk rather than indirect calls to block speculative /// execution. bool UseRetpoline; @@ -621,6 +624,7 @@ bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } bool hasCLWB() const { return HasCLWB; } bool hasRDPID() const { return HasRDPID; } + bool hasWAITPKG() const { return HasWAITPKG; } bool useRetpoline() const { return UseRetpoline; } bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -325,6 +325,7 @@ HasCLFLUSHOPT = false; HasCLWB = false; HasRDPID = false; + HasWAITPKG = false; UseRetpoline = false; UseRetpolineExternalThunk = false; IsPMULLDSlow = false; Index: test/CodeGen/X86/waitpkg-intrinsics-32.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/waitpkg-intrinsics-32.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-pc-linux -mattr=+waitpkg | FileCheck %s --check-prefix=X32 + +define void @test_umonitor(i8* %address) { +; X32-LABEL: test_umonitor: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: umonitor %eax +; X32-NEXT: retl +entry: + call void @llvm.x86.umonitor(i8* %address) + ret void +} + +define void @test_umwait(i32 %control, i32 %counter_high, i32 %counter_low) { +; X32-LABEL: test_umwait: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: umwait %ecx +; X32-NEXT: retl +entry: + call void @llvm.x86.umwait32(i32 %control, i32 %counter_high, i32 %counter_low) + ret void +} + +define void @test_tpause(i32 %control, i32 %counter_high, i32 %counter_low) { +; X32-LABEL: test_tpause: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: tpause %ecx +; X32-NEXT: retl +entry: + call void @llvm.x86.tpause32(i32 %control, i32 %counter_high, i32 %counter_low) + ret void +} + +declare void @llvm.x86.umonitor(i8*) #1 +declare void @llvm.x86.umwait32(i32, i32, i32) #1 +declare void @llvm.x86.tpause32(i32, i32, i32) Index: test/CodeGen/X86/waitpkg-intrinsics-64.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/waitpkg-intrinsics-64.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+waitpkg | FileCheck %s --check-prefix=X64 + +define void @test_umonitor(i8* %address) { +; X64-LABEL: test_umonitor: +; X64: # %bb.0: # %entry +; X64-NEXT: umonitor %rdi +; X64-NEXT: retq +entry: + call void @llvm.x86.umonitor(i8* %address) + ret void +} + +define void @test_umwait(i64 %control, i32 %counter_high, i32 %counter_low) { +; X64-LABEL: test_umwait: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %esi, %eax +; X64-NEXT: umwait %rdi +; X64-NEXT: retq +entry: + call void @llvm.x86.umwait64(i64 %control, i32 %counter_high, i32 %counter_low) + ret void +} + +define void @test_tpause(i64 %control, i32 %counter_high, i32 %counter_low) { +; X64-LABEL: test_tpause: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %esi, %eax +; X64-NEXT: tpause %rdi +; X64-NEXT: retq +entry: + call void @llvm.x86.tpause64(i64 %control, i32 %counter_high, i32 %counter_low) + ret void +} + +declare void @llvm.x86.umonitor(i8*) +declare void @llvm.x86.umwait64(i64, i32, i32) +declare void @llvm.x86.tpause64(i64, i32, i32) Index: test/MC/Disassembler/X86/x86-16.txt =================================================================== --- test/MC/Disassembler/X86/x86-16.txt +++ test/MC/Disassembler/X86/x86-16.txt @@ -791,3 +791,9 @@ # CHECK: callw -1 0xe8 0xff 0xff + +# CHECK: umonitor %eax +0xf3,0x0f,0xae,0xf0 + +# CHECK: umonitor %ax +0x67, 0xf3,0x0f,0xae,0xf0 Index: test/MC/Disassembler/X86/x86-32.txt =================================================================== --- test/MC/Disassembler/X86/x86-32.txt +++ test/MC/Disassembler/X86/x86-32.txt @@ -820,3 +820,15 @@ # CHECK: ptwritel %eax 0xf3 0x0f 0xae 0xe0 + +# CHECK: umonitor %eax +0xf3,0x0f,0xae,0xf0 + +# CHECK: umonitor %ax +0x67, 0xf3,0x0f,0xae,0xf0 + +# CHECK: umwait %eax +0xf2,0x0f,0xae,0xf0 + +# CHECK: tpause %eax +0x66,0x0f,0xae,0xf0 Index: test/MC/Disassembler/X86/x86-64.txt =================================================================== --- test/MC/Disassembler/X86/x86-64.txt +++ test/MC/Disassembler/X86/x86-64.txt @@ -516,3 +516,18 @@ # CHECK: ptwriteq %rax 0xf3 0x48 0x0f 0xae 0xe0 + +# CHECK: umonitor %rax +0xf3,0x0f,0xae,0xf0 + +# CHECK: umonitor %eax +0x67, 0xf3,0x0f,0xae,0xf0 + +# CHECK: umonitor %r13 +0xf3,0x41,0x0f,0xae,0xf5 + +# CHECK: umwait %r15 +0xf2,0x41,0x0f,0xae,0xf7 + +# CHECK: tpause %r15 +0x66,0x41,0x0f,0xae,0xf7 Index: test/MC/X86/x86-32-coverage.s =================================================================== --- test/MC/X86/x86-32-coverage.s +++ test/MC/X86/x86-32-coverage.s @@ -10741,3 +10741,14 @@ // CHECK: encoding: [0xf0,0x01,0x37] lock add %esi, (%edi) +// CHECK: umonitor %eax +// CHECK: encoding: [0xf3,0x0f,0xae,0xf0] + umonitor %eax + +// CHECK: umwait %eax +// CHECK: encoding: [0xf2,0x0f,0xae,0xf0] + umwait %eax + +// CHECK: tpause %eax +// CHECK: encoding: [0x66,0x0f,0xae,0xf0] + tpause %eax Index: test/MC/X86/x86-64.s =================================================================== --- test/MC/X86/x86-64.s +++ test/MC/X86/x86-64.s @@ -1559,6 +1559,18 @@ // CHECK: encoding: [0xf3,0x48,0x0f,0xae,0xe0] ptwriteq %rax +// CHECK: umonitor %r13 +// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xf5] +umonitor %r13 + +// CHECK: umwait %r15 +// CHECK: encoding: [0xf2,0x41,0x0f,0xae,0xf7] +umwait %r15 + +// CHECK: tpause %r15 +// CHECK: encoding: [0x66,0x41,0x0f,0xae,0xf7] +tpause %r15 + // __asm __volatile( // "pushf \n\t" // "popf \n\t"