diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -550,6 +550,7 @@ -------------------- - Support ``-mindirect-branch-cs-prefix`` for call and jmp to indirect thunk. - Fix 32-bit ``__fastcall`` and ``__vectorcall`` ABI mismatch with MSVC. +- Add support for ``PREFETCHI`` instructions. DWARF Support in Clang ---------------------- diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8432,6 +8432,8 @@ "too many %select{|||execution configuration }0arguments to " "%select{function|block|method|kernel function}0 call, " "expected at most %1, have %2; did you mean %3?">; +def err_typecheck_call_inst_cache_must_read_only : Error< + "instruction cache must be read only">; def err_arc_typecheck_convert_incompatible_pointer : Error< "incompatible pointer types passing retainable parameter of type %0" diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4648,6 +4648,8 @@ def mno_pconfig : Flag<["-"], "mno-pconfig">, Group; def mpopcnt : Flag<["-"], "mpopcnt">, Group; def mno_popcnt : Flag<["-"], "mno-popcnt">, Group; +def mprefetchi : Flag<["-"], "mprefetchi">, Group; +def mno_prefetchi : Flag<["-"], "mno-prefetchi">, Group; def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group; def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group; def mprfchw : Flag<["-"], "mprfchw">, Group; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -123,6 +123,7 @@ bool HasCLFLUSHOPT = false; bool HasCLWB = false; bool HasMOVBE = false; + bool HasPREFETCHI = false; bool HasPREFETCHWT1 = false; bool HasRDPID = false; bool HasRDPRU = false; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -290,6 +290,8 @@ HasCLWB = true; } else if (Feature == "+wbnoinvd") { HasWBNOINVD = true; + } else if (Feature == "+prefetchi") { + HasPREFETCHI = true; } else if (Feature == "+prefetchwt1") { HasPREFETCHWT1 = true; } else if (Feature == "+clzero") { @@ -738,6 +740,8 @@ Builder.defineMacro("__SHSTK__"); if (HasSGX) Builder.defineMacro("__SGX__"); + if (HasPREFETCHI) + Builder.defineMacro("__PREFETCHI__"); if (HasPREFETCHWT1) Builder.defineMacro("__PREFETCHWT1__"); if (HasCLZERO) @@ -929,6 +933,7 @@ .Case("pconfig", true) .Case("pku", true) .Case("popcnt", true) + .Case("prefetchi", true) .Case("prefetchwt1", true) .Case("prfchw", true) .Case("ptwrite", true) @@ -1025,6 +1030,7 @@ .Case("pconfig", HasPCONFIG) .Case("pku", HasPKU) .Case("popcnt", HasPOPCNT) + .Case("prefetchi", HasPREFETCHI) .Case("prefetchwt1", HasPREFETCHWT1) .Case("prfchw", HasPRFCHW) .Case("ptwrite", HasPTWRITE) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2917,13 +2917,14 @@ /*EmittedE=*/nullptr, IsDynamic)); } case Builtin::BI__builtin_prefetch: { - Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); + Value *Locality, *RW, *Data, *Address = EmitScalarExpr(E->getArg(0)); // FIXME: Technically these constants should of type 'int', yes? RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : llvm::ConstantInt::get(Int32Ty, 0); Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : llvm::ConstantInt::get(Int32Ty, 3); - Value *Data = llvm::ConstantInt::get(Int32Ty, 1); + Data = (E->getNumArgs() > 3) ? EmitScalarExpr(E->getArg(3)) : + llvm::ConstantInt::get(Int32Ty, 1); Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); } diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -174,6 +174,7 @@ pkuintrin.h pmmintrin.h popcntintrin.h + prfchiintrin.h prfchwintrin.h ptwriteintrin.h rdpruintrin.h diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -204,6 +204,9 @@ #define bit_AVX512BF16 0x00000020 #define bit_HRESET 0x00400000 +/* Features in %edx for leaf 7 sub-leaf 1 */ +#define bit_PREFETCHI 0x00004000 + /* Features in %eax for leaf 13 sub-leaf 1 */ #define bit_XSAVEOPT 0x00000001 #define bit_XSAVEC 0x00000002 diff --git a/clang/lib/Headers/prfchiintrin.h b/clang/lib/Headers/prfchiintrin.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/prfchiintrin.h @@ -0,0 +1,56 @@ +/*===---- prfchiintrin.h - PREFETCHI intrinsic -----------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __PRFCHIINTRIN_H +#define __PRFCHIINTRIN_H + +#ifdef __x86_64__ + +/// Loads an instruction sequence containing the specified memory address into +/// all level cache. +/// +/// Note that the effect of this intrinsic is dependent on the processor +/// implementation. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c PREFETCHIT0 instruction. +/// +/// \param __P +/// A pointer specifying the memory address to be prefetched. +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_m_prefetchit0(volatile const void *__P) { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-qual" + __builtin_prefetch((const void *)__P, 0, 3 /* _MM_HINT_T0 */, 0 /* inst */); +#pragma clang diagnostic pop +} + +/// Loads an instruction sequence containing the specified memory address into +/// all but the first-level cache. +/// +/// Note that the effect of this intrinsic is dependent on the processor +/// implementation. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c PREFETCHIT1 instruction. +/// +/// \param __P +/// A pointer specifying the memory address to be prefetched. +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_m_prefetchit1(volatile const void *__P) { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-qual" + __builtin_prefetch((const void *)__P, 0, 2 /* _MM_HINT_T1 */, 0 /* inst */); +#pragma clang diagnostic pop +} +#endif /* __x86_64__ */ + +#endif /* __PRFCHWINTRIN_H */ diff --git a/clang/lib/Headers/x86gprintrin.h b/clang/lib/Headers/x86gprintrin.h --- a/clang/lib/Headers/x86gprintrin.h +++ b/clang/lib/Headers/x86gprintrin.h @@ -25,6 +25,11 @@ #include #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__PRFCHI__) +#include +#endif + #if defined(__i386__) #define __SAVE_GPRBX "mov {%%ebx, %%eax |eax, ebx};" #define __RESTORE_GPRBX "mov {%%eax, %%ebx |ebx, eax};" diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -7571,17 +7571,46 @@ bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { unsigned NumArgs = TheCall->getNumArgs(); - if (NumArgs > 3) + if (NumArgs > 4) return Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_many_args_at_most) - << 0 /*function call*/ << 3 << NumArgs << TheCall->getSourceRange(); + << 0 /*function call*/ << 4 << NumArgs << TheCall->getSourceRange(); + + auto SemaBuiltinConstantArgRange = + [this, TheCall](int ArgNum, int Low, int High, int &Val) -> bool { + if (isConstantEvaluated()) + return false; + llvm::APSInt Result; + + // We can't check the value of a dependent argument. + Expr *Arg = TheCall->getArg(ArgNum); + if (Arg->isTypeDependent() || Arg->isValueDependent()) + return false; + + // Check constant-ness first. + if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + return true; + + Val = Result.getSExtValue(); + if (Val < Low || Val > High) + return Diag(TheCall->getBeginLoc(), diag::err_argument_invalid_range) + << Val << Low << High << Arg->getSourceRange(); + + return false; + }; // Argument 0 is checked for us and the remaining arguments must be // constant integers. + int Vals[3] = {0, 0, 1}; for (unsigned i = 1; i != NumArgs; ++i) - if (SemaBuiltinConstantArgRange(TheCall, i, 0, i == 1 ? 1 : 3)) + if (SemaBuiltinConstantArgRange(i, 0, i == 2 ? 3 : 1, Vals[i - 1])) return true; + if (Vals[0] == 1 && Vals[2] == 0) + return Diag(TheCall->getEndLoc(), + diag::err_typecheck_call_inst_cache_must_read_only) + << TheCall->getSourceRange(); + return false; } diff --git a/clang/test/CodeGen/X86/prefetchi-builtins.c b/clang/test/CodeGen/X86/prefetchi-builtins.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/X86/prefetchi-builtins.c @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -target-feature +prefetchi -emit-llvm -o - %s | FileCheck %s + + +#include + +void test_m_prefetch_it0(void *p) { + return _m_prefetchit0(p); + // CHECK-LABEL: define{{.*}} void @test_m_prefetch_it0 + // CHECK: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 0) +} + +void test_m_prefetch_it1(void *p) { + return _m_prefetchit1(p); + // CHECK-LABEL: define{{.*}} void @test_m_prefetch_it1 + // CHECK: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 2, i32 0) +} diff --git a/clang/test/CodeGen/builtins-arm.c b/clang/test/CodeGen/builtins-arm.c --- a/clang/test/CodeGen/builtins-arm.c +++ b/clang/test/CodeGen/builtins-arm.c @@ -97,8 +97,8 @@ __builtin_arm_prefetch(&i, 1, 1); // CHECK: call {{.*}} @llvm.prefetch.p0(ptr %{{.*}}, i32 1, i32 3, i32 1) - __builtin_arm_prefetch(&i, 1, 0); - // CHECK: call {{.*}} @llvm.prefetch.p0(ptr %{{.*}}, i32 1, i32 3, i32 0) + __builtin_arm_prefetch(&i, 0, 0); + // CHECK: call {{.*}} @llvm.prefetch.p0(ptr %{{.*}}, i32 0, i32 3, i32 0) } void ldc(const void *i) { diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -91,6 +91,11 @@ // PREFETCHWT1: "-target-feature" "+prefetchwt1" // NO-PREFETCHWT1: "-target-feature" "-prefetchwt1" +// RUN: %clang --target=i386 -march=i386 -mprefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=PREFETCHI %s +// RUN: %clang --target=i386 -march=i386 -mno-prefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-PREFETCHI %s +// PREFETCHI: "-target-feature" "+prefetchi" +// NO-PREFETCHI: "-target-feature" "-prefetchi" + // RUN: %clang --target=i386 -march=i386 -mclzero %s -### 2>&1 | FileCheck -check-prefix=CLZERO %s // RUN: %clang --target=i386 -march=i386 -mno-clzero %s -### 2>&1 | FileCheck -check-prefix=NO-CLZERO %s // CLZERO: "-target-feature" "+clzero" diff --git a/clang/test/Sema/builtin-prefetch.c b/clang/test/Sema/builtin-prefetch.c --- a/clang/test/Sema/builtin-prefetch.c +++ b/clang/test/Sema/builtin-prefetch.c @@ -4,11 +4,13 @@ int a; __builtin_prefetch(&a); __builtin_prefetch(&a, 1); - __builtin_prefetch(&a, 1, 2); - __builtin_prefetch(&a, 1, 9, 3); // expected-error{{too many arguments to function}} + __builtin_prefetch(&a, 0, 2, 1); + __builtin_prefetch(&a, 1, 2, 0); // expected-error{{instruction cache must be read only}} + __builtin_prefetch(&a, 1, 9, 8, 3); // expected-error{{too many arguments to function}} __builtin_prefetch(&a, "hello", 2); // expected-error{{argument to '__builtin_prefetch' must be a constant integer}} __builtin_prefetch(&a, a, 2); // expected-error{{argument to '__builtin_prefetch' must be a constant integer}} __builtin_prefetch(&a, 2); // expected-error{{argument value 2 is outside the valid range [0, 1]}} __builtin_prefetch(&a, 0, 4); // expected-error{{argument value 4 is outside the valid range [0, 3]}} __builtin_prefetch(&a, -1, 4); // expected-error{{argument value -1 is outside the valid range [0, 1]}} + __builtin_prefetch(&a, 1, 2, 3); // expected-error{{argument value 3 is outside the valid range [0, 1]}} } diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -174,6 +174,7 @@ X86_FEATURE (MWAITX, "mwaitx") X86_FEATURE (PCONFIG, "pconfig") X86_FEATURE (PKU, "pku") +X86_FEATURE (PREFETCHI, "prefetchi") X86_FEATURE (PREFETCHWT1, "prefetchwt1") X86_FEATURE (PRFCHW, "prfchw") X86_FEATURE (PTWRITE, "ptwrite") diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5175,14 +5175,16 @@ "llvm.init_trampoline parameter #2 must resolve to a function.", Call); break; - case Intrinsic::prefetch: - Check(cast(Call.getArgOperand(1))->getZExtValue() < 2, - "rw argument to llvm.prefetch must be 0-1", Call); - Check(cast(Call.getArgOperand(2))->getZExtValue() < 4, - "locality argument to llvm.prefetch must be 0-4", Call); - Check(cast(Call.getArgOperand(3))->getZExtValue() < 2, - "cache type argument to llvm.prefetch must be 0-1", Call); + case Intrinsic::prefetch: { + int RW = cast(Call.getArgOperand(1))->getZExtValue(); + int Locality = cast(Call.getArgOperand(2))->getZExtValue(); + int Data = cast(Call.getArgOperand(3))->getZExtValue(); + Check(RW < 2, "rw argument to llvm.prefetch must be 0-1", Call); + Check(Locality < 4, "locality argument to llvm.prefetch must be 0-4", Call); + Check(Data < 2, "cache type argument to llvm.prefetch must be 0-1", Call); + Check(Data != 0 || RW != 1, "instruction cache must be read only", Call); break; + } case Intrinsic::stackprotector: Check(isa(Call.getArgOperand(1)->stripPointerCasts()), "llvm.stackprotector parameter #2 must resolve to an alloca.", Call); diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1808,6 +1808,7 @@ Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); + Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1); bool HasLeafD = MaxLevel >= 0xd && !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -581,6 +581,7 @@ constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesHRESET = {}; +static constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {}; static constexpr FeatureBitset ImpliedFeaturesAVX512FP16 = FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL; // Key Locker Features diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -134,6 +134,9 @@ def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; +def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI", + "true", + "Prefetch instruction with T0 or T1 Hint">; def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", "true", "Prefetch with Intent to Write and T1 Hint">; diff --git a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp --- a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp +++ b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp @@ -73,7 +73,8 @@ bool IsPrefetchOpcode(unsigned Opcode) { return Opcode == X86::PREFETCHNTA || Opcode == X86::PREFETCHT0 || - Opcode == X86::PREFETCHT1 || Opcode == X86::PREFETCHT2; + Opcode == X86::PREFETCHT1 || Opcode == X86::PREFETCHT2 || + Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1; } } // end anonymous namespace diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36832,6 +36832,18 @@ MI.eraseFromParent(); // The pseudo is gone now. return BB; } + case X86::PREFETCHIT0: + case X86::PREFETCHIT1: { + unsigned Opc = + MI.getOpcode() == X86::PREFETCHIT0 ? X86::PREFETCHT0 : X86::PREFETCHT1; + if (MI.getOperand(0).getReg() != X86::RIP) { + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc)); + for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx) + MIB.add(MI.getOperand(Idx)); + MI.eraseFromParent(); + } + return BB; + } } } diff --git a/llvm/lib/Target/X86/X86Instr3DNow.td b/llvm/lib/Target/X86/X86Instr3DNow.td --- a/llvm/lib/Target/X86/X86Instr3DNow.td +++ b/llvm/lib/Target/X86/X86Instr3DNow.td @@ -93,7 +93,7 @@ let Predicates = [Has3DNow, NoSSEPrefetch] in def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr), "prefetch\t$addr", - [(prefetch addr:$addr, imm, imm, (i32 1))]>, TB; + [(prefetch addr:$addr, imm, imm, (i32 imm))]>, TB; def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr", [(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))]>, diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -956,6 +956,7 @@ def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">; def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">; +def HasPREFETCHI : Predicate<"Subtarget->hasPREFETCHI()">; def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">; def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">; def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; @@ -2998,6 +2999,17 @@ [(set EFLAGS, (X86testui))]>, XS; } +//===----------------------------------------------------------------------===// +// PREFETCHIT0 and PREFETCHIT1 Instructions +// prefetch ADDR, RW, Locality, Data +let Predicates = [HasPREFETCHI, In64BitMode], SchedRW = [WriteLoad], + usesCustomInserter = 1 in { + def PREFETCHIT0 : I<0x18, MRM7m, (outs), (ins i8mem:$src), + "prefetchit0\t$src", [(prefetch addr:$src, (i32 0), (i32 3), (i32 0))]>, TB; + def PREFETCHIT1 : I<0x18, MRM6m, (outs), (ins i8mem:$src), + "prefetchit1\t$src", [(prefetch addr:$src, (i32 0), (i32 2), (i32 0))]>, TB; +} + //===----------------------------------------------------------------------===// // Pattern fragments to auto generate TBM instructions. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3201,13 +3201,13 @@ // Prefetch intrinsic. let Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in { def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), - "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB; + "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), imm)]>, TB; def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), - "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB; + "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), imm)]>, TB; def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), - "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB; + "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), imm)]>, TB; def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), - "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB; + "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), imm)]>, TB; } // FIXME: How should flush instruction be modeled? diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -221,7 +221,8 @@ // We implicitly enable these when we have a write prefix supporting cache // level OR if we have prfchw, but don't already have a read prefetch from // 3dnow. - return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1(); + return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() || + hasPREFETCHI(); } bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); } // These are generic getters that OR together all of the thunk types diff --git a/llvm/test/CodeGen/SystemZ/prefetch-01.ll b/llvm/test/CodeGen/SystemZ/prefetch-01.ll --- a/llvm/test/CodeGen/SystemZ/prefetch-01.ll +++ b/llvm/test/CodeGen/SystemZ/prefetch-01.ll @@ -15,14 +15,11 @@ ret void } -; Check that instruction write prefetches are ignored. -define dso_local void @f2(ptr %ptr) { -; CHECK-LABEL: f2: -; CHECK-NOT: %r2 -; CHECK: br %r14 - call void @llvm.prefetch(ptr %ptr, i32 1, i32 0, i32 0) - ret void -} +; Instruction write prefetches are invalid. +; define dso_local void @f2(ptr %ptr) { +; call void @llvm.prefetch(ptr %ptr, i32 1, i32 0, i32 0) +; ret void +; } ; Check data read prefetches. define dso_local void @f3(ptr %ptr) { diff --git a/llvm/test/CodeGen/X86/prefetch.ll b/llvm/test/CodeGen/X86/prefetch.ll --- a/llvm/test/CodeGen/X86/prefetch.ll +++ b/llvm/test/CodeGen/X86/prefetch.ll @@ -11,6 +11,8 @@ ; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1 ; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=3DNOW ; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=3DNOW +; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prefetchi | FileCheck %s -check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-- -mattr=+prefetchi | FileCheck %s -check-prefix=PREFETCHI ; Rules: ; 3dnow by itself get you just the single prefetch instruction with no hints @@ -21,7 +23,7 @@ ; rdar://10538297 -define void @t(ptr %ptr) nounwind { +define dso_local void @t(ptr %ptr) nounwind { ; SSE-LABEL: t: ; SSE: # %bb.0: # %entry ; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -33,6 +35,10 @@ ; SSE-NEXT: prefetcht1 (%eax) ; SSE-NEXT: prefetcht0 (%eax) ; SSE-NEXT: prefetchnta (%eax) +; SSE-NEXT: prefetcht1 (%eax) +; SSE-NEXT: prefetcht0 (%eax) +; SSE-NEXT: prefetcht1 t +; SSE-NEXT: prefetcht0 ext ; SSE-NEXT: retl ; ; PRFCHWSSE-LABEL: t: @@ -46,6 +52,10 @@ ; PRFCHWSSE-NEXT: prefetchw (%eax) ; PRFCHWSSE-NEXT: prefetchw (%eax) ; PRFCHWSSE-NEXT: prefetchw (%eax) +; PRFCHWSSE-NEXT: prefetcht1 (%eax) +; PRFCHWSSE-NEXT: prefetcht0 (%eax) +; PRFCHWSSE-NEXT: prefetcht1 t +; PRFCHWSSE-NEXT: prefetcht0 ext ; PRFCHWSSE-NEXT: retl ; ; PREFETCHWT1-LABEL: t: @@ -59,6 +69,10 @@ ; PREFETCHWT1-NEXT: prefetchwt1 (%eax) ; PREFETCHWT1-NEXT: prefetchw (%eax) ; PREFETCHWT1-NEXT: prefetchwt1 (%eax) +; PREFETCHWT1-NEXT: prefetcht1 (%eax) +; PREFETCHWT1-NEXT: prefetcht0 (%eax) +; PREFETCHWT1-NEXT: prefetcht1 t +; PREFETCHWT1-NEXT: prefetcht0 ext ; PREFETCHWT1-NEXT: retl ; ; 3DNOW-LABEL: t: @@ -72,7 +86,27 @@ ; 3DNOW-NEXT: prefetchw (%eax) ; 3DNOW-NEXT: prefetchw (%eax) ; 3DNOW-NEXT: prefetchw (%eax) +; 3DNOW-NEXT: prefetch (%eax) +; 3DNOW-NEXT: prefetch (%eax) +; 3DNOW-NEXT: prefetch t +; 3DNOW-NEXT: prefetch ext ; 3DNOW-NEXT: retl +; +; PREFETCHI-LABEL: t: +; PREFETCHI: # %bb.0: # %entry +; PREFETCHI-NEXT: prefetcht2 (%rdi) +; PREFETCHI-NEXT: prefetcht1 (%rdi) +; PREFETCHI-NEXT: prefetcht0 (%rdi) +; PREFETCHI-NEXT: prefetchnta (%rdi) +; PREFETCHI-NEXT: prefetcht2 (%rdi) +; PREFETCHI-NEXT: prefetcht1 (%rdi) +; PREFETCHI-NEXT: prefetcht0 (%rdi) +; PREFETCHI-NEXT: prefetchnta (%rdi) +; PREFETCHI-NEXT: prefetcht1 (%rdi) +; PREFETCHI-NEXT: prefetcht0 (%rdi) +; PREFETCHI-NEXT: prefetchit1 t(%rip) +; PREFETCHI-NEXT: prefetchit0 ext(%rip) +; PREFETCHI-NEXT: retq entry: tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 1, i32 1 ) tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 2, i32 1 ) @@ -82,7 +116,12 @@ tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 2, i32 1 ) tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 3, i32 1 ) tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 0, i32 1 ) + tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 2, i32 0 ) + tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 3, i32 0 ) + tail call void @llvm.prefetch( ptr @t, i32 0, i32 2, i32 0 ) + tail call void @llvm.prefetch( ptr @ext, i32 0, i32 3, i32 0 ) ret void } +declare dso_local void @ext() nounwind declare void @llvm.prefetch(ptr, i32, i32, i32) nounwind diff --git a/llvm/test/MC/Disassembler/X86/x86-64.txt b/llvm/test/MC/Disassembler/X86/x86-64.txt --- a/llvm/test/MC/Disassembler/X86/x86-64.txt +++ b/llvm/test/MC/Disassembler/X86/x86-64.txt @@ -761,3 +761,9 @@ # CHECK: rdpru 0x0f,0x01,0xfd + +# CHECK: prefetchit0 (%rip) +0x0f,0x18,0x3d,0x00,0x00,0x00,0x00 + +# CHECK: prefetchit1 (%rip) +0x0f,0x18,0x35,0x00,0x00,0x00,0x00 diff --git a/llvm/test/MC/X86/PREFETCH-64.s b/llvm/test/MC/X86/PREFETCH-64.s --- a/llvm/test/MC/X86/PREFETCH-64.s +++ b/llvm/test/MC/X86/PREFETCH-64.s @@ -168,3 +168,50 @@ // CHECK: encoding: [0x0f,0x0d,0x12] prefetchwt1 (%rdx) +// CHECK: prefetchit0 485498096 +// CHECK: encoding: [0x0f,0x18,0x3c,0x25,0xf0,0x1c,0xf0,0x1c] +prefetchit0 485498096 + +// CHECK: prefetchit0 64(%rdx) +// CHECK: encoding: [0x0f,0x18,0x7a,0x40] +prefetchit0 64(%rdx) + +// CHECK: prefetchit0 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x7c,0x82,0x40] +prefetchit0 64(%rdx,%rax,4) + +// CHECK: prefetchit0 -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x7c,0x82,0xc0] +prefetchit0 -64(%rdx,%rax,4) + +// CHECK: prefetchit0 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x18,0x7c,0x02,0x40] +prefetchit0 64(%rdx,%rax) + +// CHECK: prefetchit0 (%rdx) +// CHECK: encoding: [0x0f,0x18,0x3a] +prefetchit0 (%rdx) + +// CHECK: prefetchit1 485498096 +// CHECK: encoding: [0x0f,0x18,0x34,0x25,0xf0,0x1c,0xf0,0x1c] +prefetchit1 485498096 + +// CHECK: prefetchit1 64(%rdx) +// CHECK: encoding: [0x0f,0x18,0x72,0x40] +prefetchit1 64(%rdx) + +// CHECK: prefetchit1 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x74,0x82,0x40] +prefetchit1 64(%rdx,%rax,4) + +// CHECK: prefetchit1 -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x74,0x82,0xc0] +prefetchit1 -64(%rdx,%rax,4) + +// CHECK: prefetchit1 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x18,0x74,0x02,0x40] +prefetchit1 64(%rdx,%rax) + +// CHECK: prefetchit1 (%rdx) +// CHECK: encoding: [0x0f,0x18,0x32] +prefetchit1 (%rdx)