Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -6413,3 +6413,15 @@ def int_x86_tpause : GCCBuiltin<"__builtin_ia32_tpause">, Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; } + +//===----------------------------------------------------------------------===// +// Direct Move Instructions + +let TargetPrefix = "x86" in { + def int_x86_directstore32 : GCCBuiltin<"__builtin_ia32_directstore_u32">, + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>; + def int_x86_directstore64 : GCCBuiltin<"__builtin_ia32_directstore_u64">, + Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; + def int_x86_movdir64b : GCCBuiltin<"__builtin_ia32_movdir64b">, + Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], []>; +} Index: llvm/trunk/lib/Support/Host.cpp =================================================================== --- llvm/trunk/lib/Support/Host.cpp +++ llvm/trunk/lib/Support/Host.cpp @@ -1261,6 +1261,8 @@ Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); + Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); + Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); Features["ibt"] = HasLeaf7 && ((EDX >> 20) & 1); Index: llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp =================================================================== --- llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -1057,14 +1057,15 @@ } /* - * Absolute moves and umonitor need special handling. + * Absolute moves, umonitor, and movdir64b need special handling. * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are * inverted w.r.t. * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in * any position. */ if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) || - (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE))) { + (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) || + (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) { /* Make sure we observed the prefixes in any position. */ if (insn->hasAdSize) attrMask |= ATTR_ADSIZE; @@ -1074,6 +1075,7 @@ /* In 16-bit, invert the attributes. */ if (insn->mode == MODE_16BIT) { attrMask ^= ATTR_ADSIZE; + /* The OpSize attribute is only valid with the absolute moves. */ if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) attrMask ^= ATTR_OPSIZE; Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -379,6 +379,12 @@ "Enable retpoline, but with an externally provided thunk.", [FeatureRetpoline]>; +// Direct Move instructions. +def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", + "Support movdiri instruction">; +def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", + "Support movdir64b instruction">; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -629,6 +635,8 @@ ProcIntelTRM, FeatureCLDEMOTE, FeatureGFNI, + FeatureMOVDIRI, + FeatureMOVDIR64B, FeatureRDPID, FeatureSGX, FeatureWAITPKG Index: llvm/trunk/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td @@ -891,6 +891,8 @@ def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">; def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">; def HasCLDEMOTE : Predicate<"Subtarget->hasCLDEMOTE()">; +def HasMOVDIRI : Predicate<"Subtarget->hasMOVDIRI()">; +def HasMOVDIR64B : Predicate<"Subtarget->hasMOVDIR64B()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasMPX : Predicate<"Subtarget->hasMPX()">; @@ -2674,6 +2676,37 @@ } // SchedRW //===----------------------------------------------------------------------===// +// MOVDIRI - Move doubleword/quadword as direct store +// +let SchedRW = [WriteStore] in { +def MOVDIRI32 : I<0xF9, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movdiri\t{$src, $dst|$dst, $src}", + [(int_x86_directstore32 addr:$dst, GR32:$src)]>, + T8, Requires<[HasMOVDIRI]>; +def MOVDIRI64 : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "movdiri\t{$src, $dst|$dst, $src}", + [(int_x86_directstore64 addr:$dst, GR64:$src)]>, + T8, Requires<[In64BitMode, HasMOVDIRI]>; +} // SchedRW + +//===----------------------------------------------------------------------===// +// MOVDIR64B - Move 64 bytes as direct store +// +let SchedRW = [WriteStore] in { +def MOVDIR64B16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src), + "movdir64b\t{$src, $dst|$dst, $src}", []>, + T8PD, AdSize16, Requires<[HasMOVDIR64B, Not64BitMode]>; +def MOVDIR64B32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src), + "movdir64b\t{$src, $dst|$dst, $src}", + [(int_x86_movdir64b GR32:$dst, addr:$src)]>, + T8PD, AdSize32, Requires<[HasMOVDIR64B]>; +def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), + "movdir64b\t{$src, $dst|$dst, $src}", + [(int_x86_movdir64b GR64:$dst, addr:$src)]>, + T8PD, AdSize64, Requires<[HasMOVDIR64B, In64BitMode]>; +} // SchedRW + +//===----------------------------------------------------------------------===// // CLZERO Instruction // let SchedRW = [WriteSystem] in { Index: llvm/trunk/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h +++ llvm/trunk/lib/Target/X86/X86Subtarget.h @@ -209,6 +209,12 @@ /// Processor has Cache Line Demote instruction bool HasCLDEMOTE; + /// Processor has MOVDIRI instruction (direct store integer). + bool HasMOVDIRI; + + /// Processor has MOVDIR64B instruction (direct store 64 bytes). + bool HasMOVDIR64B; + /// Processor has Prefetch with intent to Write instruction bool HasPREFETCHWT1; @@ -582,6 +588,8 @@ bool hasMWAITX() const { return HasMWAITX; } bool hasCLZERO() const { return HasCLZERO; } bool hasCLDEMOTE() const { return HasCLDEMOTE; } + bool hasMOVDIRI() const { return HasMOVDIRI; } + bool hasMOVDIR64B() const { return HasMOVDIR64B; } bool isSHLDSlow() const { return IsSHLDSlow; } bool isPMULLDSlow() const { return IsPMULLDSlow; } bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } Index: llvm/trunk/lib/Target/X86/X86Subtarget.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp @@ -317,6 +317,8 @@ HasMWAITX = false; HasCLZERO = false; HasCLDEMOTE = false; + HasMOVDIRI = false; + HasMOVDIR64B = false; HasMPX = false; HasSHSTK = false; HasIBT = false; Index: llvm/trunk/test/CodeGen/X86/movdir-intrinsic-x86.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/movdir-intrinsic-x86.ll +++ llvm/trunk/test/CodeGen/X86/movdir-intrinsic-x86.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri -mattr=+movdir64b | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+movdiri -mattr=+movdir64b | FileCheck %s --check-prefix=X32 + +define void @test_movdiri(i8* %p, i32 %v) { +; X64-LABEL: test_movdiri: +; X64: # %bb.0: # %entry +; X64-NEXT: movdiri %esi, (%rdi) +; X64-NEXT: retq +; +; X32-LABEL: test_movdiri: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movdiri %eax, (%ecx) +; X32-NEXT: retl +entry: + call void @llvm.x86.directstore32(i8* %p, i32 %v) + ret void +} + +declare void @llvm.x86.directstore32(i8*, i32) + +define void @test_movdir64b(i8* %dst, i8* %src) { +; X64-LABEL: test_movdir64b: +; X64: # %bb.0: # %entry +; X64-NEXT: movdir64b (%rsi), %rdi +; X64-NEXT: retq +; +; X32-LABEL: test_movdir64b: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movdir64b (%eax), %ecx +; X32-NEXT: retl +entry: + call void @llvm.x86.movdir64b(i8* %dst, i8* %src) + ret void +} + +declare void @llvm.x86.movdir64b(i8*, i8*) Index: llvm/trunk/test/CodeGen/X86/movdir-intrinsic-x86_64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/movdir-intrinsic-x86_64.ll +++ llvm/trunk/test/CodeGen/X86/movdir-intrinsic-x86_64.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri | FileCheck %s + +define void @test_movdiri(i8* %p, i64 %v) { +; CHECK-LABEL: test_movdiri: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movdiri %rsi, (%rdi) +; CHECK-NEXT: retq +entry: + call void @llvm.x86.directstore64(i8* %p, i64 %v) + ret void +} + +declare void @llvm.x86.directstore64(i8*, i64) Index: llvm/trunk/test/MC/Disassembler/X86/x86-16.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/X86/x86-16.txt +++ llvm/trunk/test/MC/Disassembler/X86/x86-16.txt @@ -800,3 +800,9 @@ # CHECK: umonitor %eax 0x67 0xf3 0x0f 0xae 0xf0 + +#CHECK: movdir64b (%esi), %eax +0x67 0x66 0x0f 0x38 0xf8 0x06 + +#CHECK: movdir64b (%si), %ax +0x66 0x0f 0x38 0xf8 0x04 Index: llvm/trunk/test/MC/Disassembler/X86/x86-32.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/X86/x86-32.txt +++ llvm/trunk/test/MC/Disassembler/X86/x86-32.txt @@ -859,3 +859,15 @@ # CHECK: tpause %eax 0x66 0x0f 0xae 0xf0 + +#CHECK: movdiri %eax, 64(%edx,%edi) +0x0f 0x38 0xf9 0x44 0x3a 0x40 + +#CHECK: movdir64b 485498096, %ecx +0x66 0x0f 0x38 0xf8 0x0d 0xf0 0x1c 0xf0 0x1c + +#CHECK: movdir64b (%esi), %eax +0x66 0x0f 0x38 0xf8 0x06 + +#CHECK: movdir64b (%si), %ax +0x67 0x66 0x0f 0x38 0xf8 0x04 Index: llvm/trunk/test/MC/Disassembler/X86/x86-64.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/X86/x86-64.txt +++ llvm/trunk/test/MC/Disassembler/X86/x86-64.txt @@ -555,3 +555,12 @@ # CHECK: bswapq %rbx 0x48 0x0f 0xcb + +#CHECK: movdiri %r13d, 64(%rdx,%rax,4) +0x44 0x0f 0x38 0xf9 0x6c 0x82 0x40 + +#CHECK: movdir64b 485498096, %rax +0x66 0x0f 0x38 0xf8 0x04 0x25 0xf0 0x1c 0xf0 0x1c + +#CHECK: movdir64b 485498096, %eax +0x67 0x66 0x0f 0x38 0xf8 0x04 0x25 0xf0 0x1c 0xf0 0x1c Index: llvm/trunk/test/MC/X86/x86-16.s =================================================================== --- llvm/trunk/test/MC/X86/x86-16.s +++ llvm/trunk/test/MC/X86/x86-16.s @@ -981,3 +981,11 @@ // CHECK: umonitor %eax // CHECK: encoding: [0x67,0xf3,0x0f,0xae,0xf0] umonitor %eax + +// CHECK: movdir64b (%esi), %eax +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xf8,0x06] +movdir64b (%esi), %eax + +// CHECK: movdir64b (%si), %ax +// CHECK: encoding: [0x66,0x0f,0x38,0xf8,0x04] +movdir64b (%si), %ax Index: llvm/trunk/test/MC/X86/x86-32-coverage.s =================================================================== --- llvm/trunk/test/MC/X86/x86-32-coverage.s +++ llvm/trunk/test/MC/X86/x86-32-coverage.s @@ -10768,3 +10768,27 @@ // CHECK: tpause %eax // CHECK: encoding: [0x66,0x0f,0xae,0xf0] tpause %eax + +// CHECK: movdiri %eax, 64(%edx,%edi) +// CHECK: # encoding: [0x0f,0x38,0xf9,0x44,0x3a,0x40] + movdiri %eax, 64(%edx,%edi) + +// CHECK: movdir64b 485498096, %ecx +// CHECK: # encoding: [0x66,0x0f,0x38,0xf8,0x0d,0xf0,0x1c,0xf0,0x1c] + movdir64b 485498096, %ecx + +// CHECK: movdir64b 485498096, %cx +// CHECK: # encoding: [0x67,0x66,0x0f,0x38,0xf8,0x0d,0xf0,0x1c,0xf0,0x1c] + movdir64b 485498096, %cx + +// CHECK: movdir64b (%edx), %eax +// CHECK: # encoding: [0x66,0x0f,0x38,0xf8,0x02] + movdir64b (%edx), %eax + +// CHECK: movdir64b (%esi), %eax +// CHECK: # encoding: [0x66,0x0f,0x38,0xf8,0x06] + movdir64b (%esi), %eax + +// CHECK: movdir64b (%si), %ax +// CHECK: # encoding: [0x67,0x66,0x0f,0x38,0xf8,0x04] + movdir64b (%si), %ax Index: llvm/trunk/test/MC/X86/x86-64.s =================================================================== --- llvm/trunk/test/MC/X86/x86-64.s +++ llvm/trunk/test/MC/X86/x86-64.s @@ -1595,6 +1595,30 @@ // CHECK: encoding: [0x66,0x0f,0xae,0xf3] tpause %ebx +// CHECK: movdiri %r15, 485498096 +// CHECK: # encoding: [0x4c,0x0f,0x38,0xf9,0x3c,0x25,0xf0,0x1c,0xf0,0x1c] +movdiri %r15, 485498096 + +// CHECK: movdiri %r15, (%rdx) +// CHECK: # encoding: [0x4c,0x0f,0x38,0xf9,0x3a] +movdiri %r15, (%rdx) + +// CHECK: movdiri %r15, 64(%rdx) +// CHECK: # encoding: [0x4c,0x0f,0x38,0xf9,0x7a,0x40] +movdiri %r15, 64(%rdx) + +// CHECK: movdir64b 485498096, %rax +// CHECK: # encoding: [0x66,0x0f,0x38,0xf8,0x04,0x25,0xf0,0x1c,0xf0,0x1c] +movdir64b 485498096, %rax + +// CHECK: movdir64b 485498096, %eax +// CHECK: # encoding: [0x67,0x66,0x0f,0x38,0xf8,0x04,0x25,0xf0,0x1c,0xf0,0x1c] +movdir64b 485498096, %eax + +// CHECK: movdir64b (%rdx), %r15 +// CHECK: # encoding: [0x66,0x44,0x0f,0x38,0xf8,0x3a] +movdir64b (%rdx), %r15 + // __asm __volatile( // "pushf \n\t" // "popf \n\t" Index: llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp =================================================================== --- llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp +++ llvm/trunk/utils/TableGen/X86RecognizableInstr.cpp @@ -301,6 +301,8 @@ insnContext = IC_64BIT_XD_OPSIZE; else if (OpSize == X86Local::OpSize16 && OpPrefix == X86Local::XS) insnContext = IC_64BIT_XS_OPSIZE; + else if (AdSize == X86Local::AdSize32 && OpPrefix == X86Local::PD) + insnContext = IC_64BIT_OPSIZE_ADSIZE; else if (OpSize == X86Local::OpSize16 && AdSize == X86Local::AdSize32) insnContext = IC_64BIT_OPSIZE_ADSIZE; else if (OpSize == X86Local::OpSize16 || OpPrefix == X86Local::PD) @@ -328,6 +330,8 @@ insnContext = IC_XD_ADSIZE; else if (AdSize == X86Local::AdSize16 && OpPrefix == X86Local::XS) insnContext = IC_XS_ADSIZE; + else if (AdSize == X86Local::AdSize16 && OpPrefix == X86Local::PD) + insnContext = IC_OPSIZE_ADSIZE; else if (OpSize == X86Local::OpSize16 && AdSize == X86Local::AdSize16) insnContext = IC_OPSIZE_ADSIZE; else if (OpSize == X86Local::OpSize16 || OpPrefix == X86Local::PD)