Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -6282,3 +6282,14 @@ def int_x86_cldemote : GCCBuiltin<"__builtin_ia32_cldemote">, Intrinsic<[], [llvm_ptr_ty], []>; } + +//===----------------------------------------------------------------------===// +// Wait and pause enhancements +let TargetPrefix = "x86" in { + def int_x86_umonitor : GCCBuiltin<"__builtin_ia32_umonitor">, + Intrinsic<[], [llvm_ptr_ty], []>; + def int_x86_umwait : GCCBuiltin<"__builtin_ia32_umwait">, + Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_tpause : GCCBuiltin<"__builtin_ia32_tpause">, + Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +} Index: llvm/trunk/lib/Support/Host.cpp =================================================================== --- llvm/trunk/lib/Support/Host.cpp +++ llvm/trunk/lib/Support/Host.cpp @@ -1250,6 +1250,7 @@ Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); + Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); Index: llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp =================================================================== --- llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -965,8 +965,6 @@ break; } - if (insn->hasAdSize) - attrMask |= ATTR_ADSIZE; } if (insn->rexPrefix & 0x08) { @@ -1059,13 +1057,14 @@ } /* - * Absolute moves need special handling. + * Absolute moves and umonitor need special handling. * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are * inverted w.r.t. * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in * any position. */ - if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) { + if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) || + (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE))) { /* Make sure we observed the prefixes in any position. */ if (insn->hasAdSize) attrMask |= ATTR_ADSIZE; @@ -1073,8 +1072,12 @@ attrMask |= ATTR_OPSIZE; /* In 16-bit, invert the attributes. */ - if (insn->mode == MODE_16BIT) - attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE; + if (insn->mode == MODE_16BIT) { + attrMask ^= ATTR_ADSIZE; + /* The OpSize attribute is only valid with the absolute moves. */ + if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) + attrMask ^= ATTR_OPSIZE; + } if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -255,6 +255,8 @@ "Write Back No Invalidate">; def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", "Support RDPID instructions">; +def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", + "Wait and pause enhancements">; // On some processors, instructions that implicitly take two memory operands are // slow. In practice, this means that CALL, PUSH, and POP with memory operands // should be avoided in favor of a MOV + register CALL/PUSH/POP. @@ -628,7 +630,8 @@ FeatureCLDEMOTE, FeatureGFNI, FeatureRDPID, - FeatureSGX + FeatureSGX, + FeatureWAITPKG ]>; def : TremontProc<"tremont">; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -594,6 +594,9 @@ // LWP insert record. LWPINS, + // User level wait + UMWAIT, TPAUSE, + // Compare and swap. LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, LCMPXCHG8_DAG, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -21393,17 +21393,35 @@ return SDValue(); } case Intrinsic::x86_lwpins32: - case Intrinsic::x86_lwpins64: { + case Intrinsic::x86_lwpins64: + case Intrinsic::x86_umwait: + case Intrinsic::x86_tpause: { SDLoc dl(Op); SDValue Chain = Op->getOperand(0); SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); - SDValue LwpIns = - DAG.getNode(X86ISD::LWPINS, dl, VTs, Chain, Op->getOperand(2), + unsigned Opcode; + + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); + case Intrinsic::x86_umwait: + Opcode = X86ISD::UMWAIT; + break; + case Intrinsic::x86_tpause: + Opcode = X86ISD::TPAUSE; + break; + case Intrinsic::x86_lwpins32: + case Intrinsic::x86_lwpins64: + Opcode = X86ISD::LWPINS; + break; + } + + SDValue Operation = + DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2), Op->getOperand(3), Op->getOperand(4)); - SDValue SetCC = getSETCC(X86::COND_B, LwpIns.getValue(0), dl, DAG); + SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG); SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, SetCC); return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, - LwpIns.getValue(1)); + Operation.getValue(1)); } } return SDValue(); @@ -25846,6 +25864,8 @@ case X86ISD::GF2P8AFFINEINVQB: return "X86ISD::GF2P8AFFINEINVQB"; case X86ISD::NT_CALL: return "X86ISD::NT_CALL"; case X86ISD::NT_BRIND: return "X86ISD::NT_BRIND"; + case X86ISD::UMWAIT: return "X86ISD::UMWAIT"; + case X86ISD::TPAUSE: return "X86ISD::TPAUSE"; } return nullptr; } Index: llvm/trunk/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td @@ -307,6 +307,16 @@ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>; +def X86umwait : SDNode<"X86ISD::UMWAIT", + SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, + [SDNPHasChain, SDNPSideEffect]>; + +def X86tpause : SDNode<"X86ISD::TPAUSE", + SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, + [SDNPHasChain, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // @@ -891,6 +901,7 @@ def HasCLWB : Predicate<"Subtarget->hasCLWB()">; def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">; def HasRDPID : Predicate<"Subtarget->hasRDPID()">; +def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">; @@ -2640,6 +2651,31 @@ Requires<[ In64BitMode ]>; //===----------------------------------------------------------------------===// +// WAITPKG Instructions +// +let SchedRW = [WriteSystem] in { + def UMONITOR16 : I<0xAE, MRM6r, (outs), (ins GR16:$src), + "umonitor\t$src", [(int_x86_umonitor GR16:$src)]>, + XS, AdSize16, Requires<[HasWAITPKG, Not64BitMode]>; + def UMONITOR32 : I<0xAE, MRM6r, (outs), (ins GR32:$src), + "umonitor\t$src", [(int_x86_umonitor GR32:$src)]>, + XS, AdSize32, Requires<[HasWAITPKG]>; + def UMONITOR64 : I<0xAE, MRM6r, (outs), (ins GR64:$src), + "umonitor\t$src", [(int_x86_umonitor GR64:$src)]>, + XS, AdSize64, Requires<[HasWAITPKG, In64BitMode]>; + let Uses = [EAX, EDX], Defs = [EFLAGS] in { + def UMWAIT : I<0xAE, MRM6r, + (outs), (ins GR32orGR64:$src), "umwait\t$src", + [(set EFLAGS, (X86umwait GR32orGR64:$src, EDX, EAX))]>, + XD, Requires<[HasWAITPKG]>; + def TPAUSE : I<0xAE, MRM6r, + (outs), (ins GR32orGR64:$src), "tpause\t$src", + [(set EFLAGS, (X86tpause GR32orGR64:$src, EDX, EAX))]>, + PD, Requires<[HasWAITPKG]>; + } +} // SchedRW + +//===----------------------------------------------------------------------===// // CLZERO Instruction // let SchedRW = [WriteSystem] in { Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -3106,7 +3106,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, - TB, Requires<[HasMFence]>; + PS, Requires<[HasMFence]>; } // SchedRW def : Pat<(X86MFence), (MFENCE)>; Index: llvm/trunk/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h +++ llvm/trunk/lib/Target/X86/X86Subtarget.h @@ -370,6 +370,9 @@ /// Processor support RDPID instruction bool HasRDPID; + /// Processor supports WaitPKG instructions + bool HasWAITPKG; + /// Use a retpoline thunk rather than indirect calls to block speculative /// execution. bool UseRetpoline; @@ -628,6 +631,7 @@ bool hasCLWB() const { return HasCLWB; } bool hasWBNOINVD() const { return HasWBNOINVD; } bool hasRDPID() const { return HasRDPID; } + bool hasWAITPKG() const { return HasWAITPKG; } bool useRetpoline() const { return UseRetpoline; } bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } Index: llvm/trunk/lib/Target/X86/X86Subtarget.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp @@ -325,6 +325,7 @@ HasCLWB = false; HasWBNOINVD = false; HasRDPID = false; + HasWAITPKG = false; UseRetpoline = false; UseRetpolineExternalThunk = false; IsPMULLDSlow = false; Index: llvm/trunk/test/CodeGen/X86/waitpkg-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/waitpkg-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/waitpkg-intrinsics.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+waitpkg | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-pc-linux -mattr=+waitpkg | FileCheck %s --check-prefix=X32 + +define void @test_umonitor(i8* %address) { +; X64-LABEL: test_umonitor: +; X64: # %bb.0: # %entry +; X64-NEXT: umonitor %rdi +; X64-NEXT: retq +; +; X32-LABEL: test_umonitor: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: umonitor %eax +; X32-NEXT: retl +entry: + call void @llvm.x86.umonitor(i8* %address) + ret void +} + +define i8 @test_umwait(i32 %control, i32 %counter_high, i32 %counter_low) { +; X64-LABEL: test_umwait: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edx, %eax +; X64-NEXT: movl %esi, %edx +; X64-NEXT: umwait %edi +; X64-NEXT: setb %al +; X64-NEXT: retq +; +; X32-LABEL: test_umwait: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: umwait %ecx +; X32-NEXT: setb %al +; X32-NEXT: retl +entry: + call i8 @llvm.x86.umwait(i32 %control, i32 %counter_high, i32 %counter_low) + ret i8 %0 +} + +define i8 @test_tpause(i32 %control, i32 %counter_high, i32 %counter_low) { +; X64-LABEL: test_tpause: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edx, %eax +; X64-NEXT: movl %esi, %edx +; X64-NEXT: tpause %edi +; X64-NEXT: setb %al +; X64-NEXT: retq +; +; X32-LABEL: test_tpause: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: tpause %ecx +; X32-NEXT: setb %al +; X32-NEXT: retl +entry: + call i8 @llvm.x86.tpause(i32 %control, i32 %counter_high, i32 %counter_low) + ret i8 %0 +} + +declare void @llvm.x86.umonitor(i8*) +declare i8 @llvm.x86.umwait(i32, i32, i32) +declare i8 @llvm.x86.tpause(i32, i32, i32) Index: llvm/trunk/test/MC/Disassembler/X86/x86-16.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/X86/x86-16.txt +++ llvm/trunk/test/MC/Disassembler/X86/x86-16.txt @@ -794,3 +794,9 @@ # CHECK: wbnoinvd 0xf3 0x0f 0x09 + +# CHECK: umonitor %ax +0xf3 0x0f 0xae 0xf0 + +# CHECK: umonitor %eax +0x67 0xf3 0x0f 0xae 0xf0 Index: llvm/trunk/test/MC/Disassembler/X86/x86-32.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/X86/x86-32.txt +++ llvm/trunk/test/MC/Disassembler/X86/x86-32.txt @@ -847,3 +847,15 @@ 0x0f 0xb7 0x00 # CHECK: movzww (%eax), %ax 0x66 0x0f 0xb7 0x00 + +# CHECK: umonitor %eax +0xf3 0x0f 0xae 0xf0 + +# CHECK: umonitor %ax +0x67 0xf3 0x0f 0xae 0xf0 + +# CHECK: umwait %eax +0xf2 0x0f 0xae 0xf0 + +# CHECK: tpause %eax +0x66 0x0f 0xae 0xf0 Index: llvm/trunk/test/MC/Disassembler/X86/x86-64.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/X86/x86-64.txt +++ llvm/trunk/test/MC/Disassembler/X86/x86-64.txt @@ -525,3 +525,18 @@ # CHECK: cldemote -559038737(%rbx,%rcx,8) 0x0f,0x1c,0x84,0xcb,0xef,0xbe,0xad,0xde + +# CHECK: umonitor %rax +0xf3 0x0f 0xae 0xf0 + +# CHECK: umonitor %eax +0x67 0xf3 0x0f 0xae 0xf0 + +# CHECK: umonitor %r13 +0xf3 0x41 0x0f 0xae 0xf5 + +# CHECK: umwait %r15 +0xf2 0x41 0x0f 0xae 0xf7 + +# CHECK: tpause %r15 +0x66 0x41 0x0f 0xae 0xf7 Index: llvm/trunk/test/MC/X86/x86-16.s =================================================================== --- llvm/trunk/test/MC/X86/x86-16.s +++ llvm/trunk/test/MC/X86/x86-16.s @@ -973,3 +973,11 @@ // CHECK: wbnoinvd // CHECK: encoding: [0xf3,0x0f,0x09] wbnoinvd + +// CHECK: umonitor %ax +// CHECK: encoding: [0xf3,0x0f,0xae,0xf0] +umonitor %ax + +// CHECK: umonitor %eax +// CHECK: encoding: [0x67,0xf3,0x0f,0xae,0xf0] +umonitor %eax Index: llvm/trunk/test/MC/X86/x86-32-coverage.s =================================================================== --- llvm/trunk/test/MC/X86/x86-32-coverage.s +++ llvm/trunk/test/MC/X86/x86-32-coverage.s @@ -10752,3 +10752,19 @@ // CHECK: cldemote 3735928559(%ebx,%ecx,8) // CHECK: encoding: [0x0f,0x1c,0x84,0xcb,0xef,0xbe,0xad,0xde] cldemote 0xdeadbeef(%ebx,%ecx,8) + +// CHECK: umonitor %eax +// CHECK: encoding: [0xf3,0x0f,0xae,0xf0] + umonitor %eax + +// CHECK: umonitor %ax +// CHECK: encoding: [0x67,0xf3,0x0f,0xae,0xf0] + umonitor %ax + +// CHECK: umwait %eax +// CHECK: encoding: [0xf2,0x0f,0xae,0xf0] + umwait %eax + +// CHECK: tpause %eax +// CHECK: encoding: [0x66,0x0f,0xae,0xf0] + tpause %eax Index: llvm/trunk/test/MC/X86/x86-64.s =================================================================== --- llvm/trunk/test/MC/X86/x86-64.s +++ llvm/trunk/test/MC/X86/x86-64.s @@ -1571,6 +1571,34 @@ // CHECK: encoding: [0x0f,0x1c,0x84,0xcb,0xef,0xbe,0xad,0xde] cldemote 0xdeadbeef(%rbx,%rcx,8) +// CHECK: umonitor %r13 +// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xf5] +umonitor %r13 + +// CHECK: umonitor %rax +// CHECK: encoding: [0xf3,0x0f,0xae,0xf0] +umonitor %rax + +// CHECK: umonitor %eax +// CHECK: encoding: [0x67,0xf3,0x0f,0xae,0xf0] +umonitor %eax + +// CHECK: umwait %r15 +// CHECK: encoding: [0xf2,0x41,0x0f,0xae,0xf7] +umwait %r15 + +// CHECK: umwait %ebx +// CHECK: encoding: [0xf2,0x0f,0xae,0xf3] +umwait %ebx + +// CHECK: tpause %r15 +// CHECK: encoding: [0x66,0x41,0x0f,0xae,0xf7] +tpause %r15 + +// CHECK: tpause %ebx +// CHECK: encoding: [0x66,0x0f,0xae,0xf3] +tpause %ebx + // __asm __volatile( // "pushf \n\t" // "popf \n\t"