Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -6495,3 +6495,11 @@ : GCCBuiltin<"__builtin_ia32_mwaitx">, Intrinsic<[], [ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], []>; } + +//===----------------------------------------------------------------------===// +// Cache-line zero +let TargetPrefix = "x86" in { + def int_x86_clzero + : GCCBuiltin<"__builtin_ia32_clzero">, + Intrinsic<[], [ llvm_ptr_ty ], []>; +} Index: lib/Support/Host.cpp =================================================================== --- lib/Support/Host.cpp +++ lib/Support/Host.cpp @@ -1353,6 +1353,10 @@ Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); + bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && + !getX86CpuIDAndInfoEx(0x80000008,0x0, &EAX, &EBX, &ECX, &EDX); + Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); + bool HasLeaf7 = MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -202,6 +202,8 @@ "Support LAHF and SAHF instructions">; def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", "Enable MONITORX/MWAITX timer functionality">; +def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", + "Enable Cache Line Zero">; def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", "Support MPX instructions">; def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", @@ -771,6 +773,7 @@ FeatureBMI, FeatureBMI2, FeatureCLFLUSHOPT, + FeatureCLZERO, FeatureCMPXCHG16B, FeatureF16C, FeatureFMA, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -24539,6 +24539,24 @@ return BB; } +static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB, + const X86Subtarget &Subtarget) { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + // Address into RAX/EAX + unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r; + unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); + for (int i = 0; i < X86::AddrNumOperands; ++i) + MIB.add(MI->getOperand(i)); + + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr)); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock * X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -26159,6 +26177,9 @@ return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr); case X86::MONITORX: return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr); + // Cache line zero + case X86::CLZERO: + return emitClzero(&MI, BB, Subtarget); // PKU feature case X86::WRPKRU: return emitWRPKRU(MI, BB, Subtarget); Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -859,6 +859,7 @@ def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">; +def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasMPX : Predicate<"Subtarget->hasMPX()">; @@ -2455,8 +2456,21 @@ //===----------------------------------------------------------------------===// // CLZERO Instruction // -let Uses = [EAX] in -def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, TB; +let SchedRW = [ WriteSystem ] in { + let Uses = [EAX] in + def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", [], IIC_SSE_CLZERO>, TB, Requires<[ HasCLZERO ]>; + + let usesCustomInserter = 1 in { + def CLZERO : PseudoI<(outs), (ins i32mem:$src1), + [(int_x86_clzero addr:$src1)]>, + Requires<[ HasCLZERO ]>; + } +} // SchedRW + +def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, + Requires<[ Not64BitMode ]>; +def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, + Requires<[ In64BitMode ]>; //===----------------------------------------------------------------------===// // Pattern fragments to auto generate TBM instructions. Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -366,6 +366,7 @@ def IIC_SSE_MONITOR : InstrItinClass; def IIC_SSE_MWAITX : InstrItinClass; def IIC_SSE_MONITORX : InstrItinClass; +def IIC_SSE_CLZERO : InstrItinClass; def IIC_SSE_PREFETCH : InstrItinClass; def IIC_SSE_PAUSE : InstrItinClass; Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -169,6 +169,9 @@ /// Processor has MONITORX/MWAITX instructions. bool HasMWAITX; + /// Processor has Cache Line Zero instruction + bool HasCLZERO; + /// Processor has Prefetch with intent to Write instruction bool HasPFPREFETCHWT1; @@ -454,6 +457,7 @@ bool hasRDSEED() const { return HasRDSEED; } bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool hasMWAITX() const { return HasMWAITX; } + bool hasCLZERO() const { return HasCLZERO; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isSHLDSlow() const { return IsSHLDSlow; } bool isPMULLDSlow() const { return IsPMULLDSlow; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -277,6 +277,7 @@ HasRDSEED = false; HasLAHFSAHF = false; HasMWAITX = false; + HasCLZERO = false; HasMPX = false; IsBTMemSlow = false; IsPMULLDSlow = false; Index: test/CodeGen/X86/clzero.ll =================================================================== --- test/CodeGen/X86/clzero.ll +++ test/CodeGen/X86/clzero.ll @@ -0,0 +1,50 @@ +;===----------------------------------------------------------------------===// +;==== Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved. +; +; Developed by: Advanced Micro Devices, Inc. +; +; Permission is hereby granted, free of charge, to any person obtaining a copy +; of this software and associated documentation files (the "Software"), to deal +; with the Software without restriction, including without limitation the +; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +; sell copies of the Software, and to permit persons to whom the Software is +; furnished to do so, subject to the following conditions: +; +; Redistributions of source code must retain the above copyright notice, this +; list of conditions and the following disclaimers. +; +; Redistributions in binary form must reproduce the above copyright notice, +; this list of conditions and the following disclaimers in the documentation +; and/or other materials provided with the distribution. +; +; Neither the names of Advanced Micro Devices, Inc., nor the names of its +; contributors may be used to endorse or promote products derived from this +; Software without specific prior written permission. +; +; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +; CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH +; THE SOFTWARE. +;===----------------------------------------------------------------------===// +; +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+clzero | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+clzero | FileCheck %s -check-prefix=WIN64 +; RUN: llc < %s -mtriple=x86_64-linux -mcpu=znver1 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=znver1 | FileCheck %s -check-prefix=WIN64 + +; CHECK-LABEL: foo: +; CHECK: leaq (%rdi), %rax +; CHECK-NEXT: clzero +; WIN64-LABEL: foo: +; WIN64: leaq (%rcx), %rax +; WIN64-NEXT: clzero +define void @foo(i8* %p) #0 { +entry: + tail call void @llvm.x86.clzero(i8* %p) #1 + ret void +} + +declare void @llvm.x86.clzero(i8*) #1 Index: test/MC/X86/x86-64.s =================================================================== --- test/MC/X86/x86-64.s +++ test/MC/X86/x86-64.s @@ -1502,6 +1502,10 @@ // CHECK: encoding: [0x0f,0x01,0xfb] mwaitx %rax, %rcx, %rbx +// CHECK: clzero +// CHECK: encoding: [0x0f,0x01,0xfc] + clzero + // CHECK: movl %r15d, (%r15,%r15) // CHECK: encoding: [0x47,0x89,0x3c,0x3f] movl %r15d, (%r15,%r15)