Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -6421,3 +6421,14 @@ def int_x86_clzero : GCCBuiltin<"__builtin_ia32_clzero">, Intrinsic<[], [llvm_ptr_ty], []>; } + +//===----------------------------------------------------------------------===// +// Wait and pause enhancements +let TargetPrefix = "x86" in { + def int_x86_umonitor : GCCBuiltin<"__builtin_ia32_umonitor">, + Intrinsic<[], [llvm_ptr_ty], []>; + def int_x86_umwait : GCCBuiltin<"__builtin_ia32_umwait">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_tpause : GCCBuiltin<"__builtin_ia32_tpause">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +} Index: lib/Support/Host.cpp =================================================================== --- lib/Support/Host.cpp +++ lib/Support/Host.cpp @@ -1245,6 +1245,7 @@ Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); + Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); Index: lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp =================================================================== --- lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -965,8 +965,6 @@ break; } - if (insn->hasAdSize) - attrMask |= ATTR_ADSIZE; } if (insn->rexPrefix & 0x08) { @@ -1059,13 +1057,14 @@ } /* - * Absolute moves need special handling. + * Absolute moves and umonitor need special handling. * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are * inverted w.r.t. * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in * any position. */ - if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) { + if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) || + (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE))) { /* Make sure we observed the prefixes in any position. */ if (insn->hasAdSize) attrMask |= ATTR_ADSIZE; @@ -1073,8 +1072,12 @@ attrMask |= ATTR_OPSIZE; /* In 16-bit, invert the attributes. */ - if (insn->mode == MODE_16BIT) - attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE; + if (insn->mode == MODE_16BIT) { + attrMask ^= ATTR_ADSIZE; + /* The OpSize attribute is only valid with the absolute moves. */ + if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) + attrMask ^= ATTR_OPSIZE; + } if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -251,6 +251,8 @@ "Cache Line Write Back">; def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", "Support RDPID instructions">; +def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", + "Wait and pause enhancements">; // On some processors, instructions that implicitly take two memory operands are // slow. In practice, this means that CALL, PUSH, and POP with memory operands // should be avoided in favor of a MOV + register CALL/PUSH/POP. Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -889,6 +889,7 @@ def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">; def HasCLWB : Predicate<"Subtarget->hasCLWB()">; def HasRDPID : Predicate<"Subtarget->hasRDPID()">; +def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">; @@ -2670,6 +2671,31 @@ def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>, Requires<[ In64BitMode ]>; +//===----------------------------------------------------------------------===// +// WAITPKG Instructions +// +let SchedRW = [WriteSystem] in { + def UMONITOR16 : I<0xAE, MRM6r, (outs), (ins GR16:$src), + "umonitor\t$src", [(int_x86_umonitor GR16:$src)]>, + XS, AdSize16, Requires<[HasWAITPKG, Not64BitMode]>; + def UMONITOR32 : I<0xAE, MRM6r, (outs), (ins GR32:$src), + "umonitor\t$src", [(int_x86_umonitor GR32:$src)]>, + XS, AdSize32, Requires<[HasWAITPKG]>; + def UMONITOR64 : I<0xAE, MRM6r, (outs), (ins GR64:$src), + "umonitor\t$src", [(int_x86_umonitor GR64:$src)]>, + XS, AdSize64, Requires<[HasWAITPKG, In64BitMode]>; + let Uses = [ EAX, EDX ] in { + def UMWAIT : I<0xAE, MRM6r, + (outs), (ins GR32orGR64:$src), + "umwait\t$src", [(int_x86_umwait GR32orGR64:$src, EAX, EDX)]>, + XD, Requires<[HasWAITPKG]>; + def TPAUSE : I<0xAE, MRM6r, + (outs), (ins GR32orGR64:$src), + "tpause\t$src", [(int_x86_tpause GR32orGR64:$src, EAX, EDX)]>, + PD, Requires<[HasWAITPKG]>; + } +} // SchedRW + //===----------------------------------------------------------------------===// // CLZERO Instruction // Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -3528,7 +3528,7 @@ TB, Requires<[HasSSE2]>; def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, - TB, Requires<[HasMFence]>; + PS, Requires<[HasMFence]>; } // SchedRW def : Pat<(X86MFence), (MFENCE)>; Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -362,6 +362,9 @@ /// Processor support RDPID instruction bool HasRDPID; + /// Processor supports WaitPKG instructions + bool HasWAITPKG; + /// Use a retpoline thunk rather than indirect calls to block speculative /// execution. bool UseRetpoline; @@ -621,6 +624,7 @@ bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } bool hasCLWB() const { return HasCLWB; } bool hasRDPID() const { return HasRDPID; } + bool hasWAITPKG() const { return HasWAITPKG; } bool useRetpoline() const { return UseRetpoline; } bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -325,6 +325,7 @@ HasCLFLUSHOPT = false; HasCLWB = false; HasRDPID = false; + HasWAITPKG = false; UseRetpoline = false; UseRetpolineExternalThunk = false; IsPMULLDSlow = false; Index: test/CodeGen/X86/waitpkg-intrinsics-32.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/waitpkg-intrinsics-32.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-pc-linux -mattr=+waitpkg | FileCheck %s --check-prefix=X32 + +define void @test_umonitor(i8* %address) { +; X32-LABEL: test_umonitor: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: umonitor %eax +; X32-NEXT: retl +entry: + call void @llvm.x86.umonitor(i8* %address) + ret void +} + +define void @test_umwait(i32 %control, i32 %counter_high, i32 %counter_low) { +; X32-LABEL: test_umwait: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: umwait %ecx +; X32-NEXT: retl +entry: + call void @llvm.x86.umwait(i32 %control, i32 %counter_high, i32 %counter_low) + ret void +} + +define void @test_tpause(i32 %control, i32 %counter_high, i32 %counter_low) { +; X32-LABEL: test_tpause: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: tpause %ecx +; X32-NEXT: retl +entry: + call void @llvm.x86.tpause(i32 %control, i32 %counter_high, i32 %counter_low) + ret void +} + +declare void @llvm.x86.umonitor(i8*) +declare void @llvm.x86.umwait(i32, i32, i32) +declare void @llvm.x86.tpause(i32, i32, i32) Index: test/CodeGen/X86/waitpkg-intrinsics-64.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/waitpkg-intrinsics-64.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+waitpkg | FileCheck %s --check-prefix=X64 + +define void @test_umonitor(i8* %address) { +; X64-LABEL: test_umonitor: +; X64: # %bb.0: # %entry +; X64-NEXT: umonitor %rdi +; X64-NEXT: retq +entry: + call void @llvm.x86.umonitor(i8* %address) + ret void +} + +define void @test_umwait(i32 %control, i32 %counter_high, i32 %counter_low) { +; X64-LABEL: test_umwait: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %esi, %eax +; X64-NEXT: umwait %edi +; X64-NEXT: retq +entry: + call void @llvm.x86.umwait(i32 %control, i32 %counter_high, i32 %counter_low) + ret void +} + +define void @test_tpause(i32 %control, i32 %counter_high, i32 %counter_low) { +; X64-LABEL: test_tpause: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %esi, %eax +; X64-NEXT: tpause %edi +; X64-NEXT: retq +entry: + call void @llvm.x86.tpause(i32 %control, i32 %counter_high, i32 %counter_low) + ret void +} + +declare void @llvm.x86.umonitor(i8*) +declare void @llvm.x86.umwait(i32, i32, i32) +declare void @llvm.x86.tpause(i32, i32, i32) Index: test/MC/Disassembler/X86/x86-16.txt =================================================================== --- test/MC/Disassembler/X86/x86-16.txt +++ test/MC/Disassembler/X86/x86-16.txt @@ -791,3 +791,9 @@ # CHECK: callw -1 0xe8 0xff 0xff + +# CHECK: umonitor %ax +0xf3 0x0f 0xae 0xf0 + +# CHECK: umonitor %eax +0x67 0xf3 0x0f 0xae 0xf0 Index: test/MC/Disassembler/X86/x86-32.txt =================================================================== --- test/MC/Disassembler/X86/x86-32.txt +++ test/MC/Disassembler/X86/x86-32.txt @@ -820,3 +820,15 @@ # CHECK: ptwritel %eax 0xf3 0x0f 0xae 0xe0 + +# CHECK: umonitor %eax +0xf3 0x0f 0xae 0xf0 + +# CHECK: umonitor %ax +0x67 0xf3 0x0f 0xae 0xf0 + +# CHECK: umwait %eax +0xf2 0x0f 0xae 0xf0 + +# CHECK: tpause %eax +0x66 0x0f 0xae 0xf0 Index: test/MC/Disassembler/X86/x86-64.txt =================================================================== --- test/MC/Disassembler/X86/x86-64.txt +++ test/MC/Disassembler/X86/x86-64.txt @@ -516,3 +516,18 @@ # CHECK: ptwriteq %rax 0xf3 0x48 0x0f 0xae 0xe0 + +# CHECK: umonitor %rax +0xf3 0x0f 0xae 0xf0 + +# CHECK: umonitor %eax +0x67 0xf3 0x0f 0xae 0xf0 + +# CHECK: umonitor %r13 +0xf3 0x41 0x0f 0xae 0xf5 + +# CHECK: umwait %r15 +0xf2 0x41 0x0f 0xae 0xf7 + +# CHECK: tpause %r15 +0x66 0x41 0x0f 0xae 0xf7 Index: test/MC/X86/x86-16.s =================================================================== --- test/MC/X86/x86-16.s +++ test/MC/X86/x86-16.s @@ -969,3 +969,11 @@ // CHECK: lgdtw 4(%eax) // CHECK: encoding: [0x67,0x0f,0x01,0x50,0x04] data32 lgdt 4(%eax) + +// CHECK: umonitor %ax +// CHECK: encoding: [0xf3,0x0f,0xae,0xf0] +umonitor %ax + +// CHECK: umonitor %eax +// CHECK: encoding: [0x67,0xf3,0x0f,0xae,0xf0] +umonitor %eax Index: test/MC/X86/x86-32-coverage.s =================================================================== --- test/MC/X86/x86-32-coverage.s +++ test/MC/X86/x86-32-coverage.s @@ -10741,3 +10741,18 @@ // CHECK: encoding: [0xf0,0x01,0x37] lock add %esi, (%edi) +// CHECK: umonitor %eax +// CHECK: encoding: [0xf3,0x0f,0xae,0xf0] + umonitor %eax + +// CHECK: umonitor %ax +// CHECK: encoding: [0x67,0xf3,0x0f,0xae,0xf0] + umonitor %ax + +// CHECK: umwait %eax +// CHECK: encoding: [0xf2,0x0f,0xae,0xf0] + umwait %eax + +// CHECK: tpause %eax +// CHECK: encoding: [0x66,0x0f,0xae,0xf0] + tpause %eax Index: test/MC/X86/x86-64.s =================================================================== --- test/MC/X86/x86-64.s +++ test/MC/X86/x86-64.s @@ -1559,6 +1559,34 @@ // CHECK: encoding: [0xf3,0x48,0x0f,0xae,0xe0] ptwriteq %rax +// CHECK: umonitor %r13 +// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xf5] +umonitor %r13 + +// CHECK: umonitor %rax +// CHECK: encoding: [0xf3,0x0f,0xae,0xf0] +umonitor %rax + +// CHECK: umonitor %eax +// CHECK: encoding: [0x67,0xf3,0x0f,0xae,0xf0] +umonitor %eax + +// CHECK: umwait %r15 +// CHECK: encoding: [0xf2,0x41,0x0f,0xae,0xf7] +umwait %r15 + +// CHECK: umwait %ebx +// CHECK: encoding: [0xf2,0x0f,0xae,0xf3] +umwait %ebx + +// CHECK: tpause %r15 +// CHECK: encoding: [0x66,0x41,0x0f,0xae,0xf7] +tpause %r15 + +// CHECK: tpause %ebx +// CHECK: encoding: [0x66,0x0f,0xae,0xf3] +tpause %ebx + // __asm __volatile( // "pushf \n\t" // "popf \n\t"