Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -6495,3 +6495,11 @@ : GCCBuiltin<"__builtin_ia32_mwaitx">, Intrinsic<[], [ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], []>; } + +//===----------------------------------------------------------------------===// +// Cache-line zero +let TargetPrefix = "x86" in { + def int_x86_clzero + : GCCBuiltin<"__builtin_ia32_clzero">, + Intrinsic<[], [ llvm_ptr_ty ], []>; +} Index: lib/Support/Host.cpp =================================================================== --- lib/Support/Host.cpp +++ lib/Support/Host.cpp @@ -1353,6 +1353,10 @@ Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); + bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && + !getX86CpuIDAndInfoEx(0x80000008,0x0, &EAX, &EBX, &ECX, &EDX); + Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); + bool HasLeaf7 = MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -202,6 +202,8 @@ "Support LAHF and SAHF instructions">; def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", "Enable MONITORX/MWAITX timer functionality">; +def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", + "Enable Cache Line Zero">; def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", "Support MPX instructions">; def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", @@ -771,6 +773,7 @@ FeatureBMI, FeatureBMI2, FeatureCLFLUSHOPT, + FeatureCLZERO, FeatureCMPXCHG16B, FeatureF16C, FeatureFMA, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -24416,6 +24416,26 @@ return BB; } +static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB, + const X86Subtarget &Subtarget) { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + // Address into RAX/EAX + unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r; + unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); + for (int i = 0; i < X86::AddrNumOperands; ++i) + MIB.add(MI->getOperand(i)); + + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr)); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; +} + + + MachineBasicBlock * X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -26036,6 +26056,11 @@ return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr); case X86::MONITORX: return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr); + + // Cache line zero + case X86::CLZERO: + return emitClzero(&MI, BB, Subtarget); + // PKU feature case X86::WRPKRU: return emitWRPKRU(MI, BB, Subtarget); Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -859,6 +859,7 @@ def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">; +def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasMPX : Predicate<"Subtarget->hasMPX()">; @@ -2455,8 +2456,21 @@ //===----------------------------------------------------------------------===// // CLZERO Instruction // -let Uses = [EAX] in -def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, TB; +let SchedRW = [ WriteSystem ] in { + let Uses = [EAX] in + def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", [], IIC_SSE_CLZERO>, TB, Requires<[ HasCLZERO ]>; + + let usesCustomInserter = 1 in { + def CLZERO : PseudoI<(outs), (ins i32mem:$src1), + [(int_x86_clzero addr:$src1)]>, + Requires<[ HasCLZERO ]>; + } +} // SchedRW + +def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, + Requires<[ Not64BitMode ]>; +def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, + Requires<[ In64BitMode ]>; //===----------------------------------------------------------------------===// // Pattern fragments to auto generate TBM instructions. Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -366,6 +366,7 @@ def IIC_SSE_MONITOR : InstrItinClass; def IIC_SSE_MWAITX : InstrItinClass; def IIC_SSE_MONITORX : InstrItinClass; +def IIC_SSE_CLZERO : InstrItinClass; def IIC_SSE_PREFETCH : InstrItinClass; def IIC_SSE_PAUSE : InstrItinClass; Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -175,6 +175,9 @@ /// Processor has MONITORX/MWAITX instructions. bool HasMWAITX; + /// Processor has Cache Line Zero instruction + bool HasCLZERO; + /// Processor has Prefetch with intent to Write instruction bool HasPFPREFETCHWT1; @@ -466,6 +469,7 @@ bool hasRDSEED() const { return HasRDSEED; } bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool hasMWAITX() const { return HasMWAITX; } + bool hasCLZERO() const { return HasCLZERO; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isSHLDSlow() const { return IsSHLDSlow; } bool isPMULLDSlow() const { return IsPMULLDSlow; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -289,6 +289,7 @@ HasRDSEED = false; HasLAHFSAHF = false; HasMWAITX = false; + HasCLZERO = false; HasMPX = false; IsBTMemSlow = false; IsPMULLDSlow = false; Index: test/CodeGen/X86/clzero.ll =================================================================== --- test/CodeGen/X86/clzero.ll +++ test/CodeGen/X86/clzero.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+clzero | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+clzero | FileCheck %s -check-prefix=WIN64 +; RUN: llc < %s -mtriple=x86_64-linux -mcpu=znver1 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=znver1 | FileCheck %s -check-prefix=WIN64 +; RUN: llc < %s -mtriple=i686-unknown-linux -mattr=+clzero | FileCheck %s -check-prefix=WIN32 +; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+clzero | FileCheck %s -check-prefix=WIN32 +; RUN: llc < %s -mtriple=i686-pc-mingw32 -mattr=+clzero | FileCheck %s -check-prefix=WIN32 +; RUN: llc < %s -mtriple=i686-pc-cygwin -mattr=+clzero | FileCheck %s -check-prefix=WIN32 +; RUN: llc < %s -mtriple=i386-pc-linux -mattr=+clzero | FileCheck %s --check-prefix=WIN32 + +; CHECK-LABEL: foo: +; CHECK: leaq (%rdi), %rax +; CHECK-NEXT: clzero +; WIN64-LABEL: foo: +; WIN64: leaq (%rcx), %rax +; WIN64-NEXT: clzero +; WIN32: leal (%eax), %eax +; WIN32-NEXT: clzero + +define void @foo(i8* %p) #0 { +entry: + tail call void @llvm.x86.clzero(i8* %p) #1 + ret void +} + +declare void @llvm.x86.clzero(i8*) #1 Index: test/MC/X86/x86-32.s =================================================================== --- test/MC/X86/x86-32.s +++ test/MC/X86/x86-32.s @@ -444,6 +444,14 @@ // CHECK: encoding: [0x0f,0x21,0xf8] movl %dr7,%eax +// CHECK: clzero +// CHECK: encoding: [0x0f,0x01,0xfc] + clzero + +// CHECK: clzero +// CHECK: encoding: [0x0f,0x01,0xfc] + clzero %eax + // radr://8017522 // CHECK: wait // CHECK: encoding: [0x9b] Index: test/MC/X86/x86-64.s =================================================================== --- test/MC/X86/x86-64.s +++ test/MC/X86/x86-64.s @@ -1502,6 +1502,14 @@ // CHECK: encoding: [0x0f,0x01,0xfb] mwaitx %rax, %rcx, %rbx +// CHECK: clzero +// CHECK: encoding: [0x0f,0x01,0xfc] + clzero + +// CHECK: clzero +// CHECK: encoding: [0x0f,0x01,0xfc] + clzero %rax + // CHECK: movl %r15d, (%r15,%r15) // CHECK: encoding: [0x47,0x89,0x3c,0x3f] movl %r15d, (%r15,%r15)