Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def =================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -668,6 +668,12 @@ // TBM TARGET_BUILTIN(__builtin_ia32_bextri_u32, "UiUiIUi", "", "tbm") +// LWP +TARGET_BUILTIN(__builtin_ia32_llwpcb, "vv*", "", "lwp") +TARGET_BUILTIN(__builtin_ia32_slwpcb, "v*", "", "lwp") +TARGET_BUILTIN(__builtin_ia32_lwpins32, "UcUiUiUi", "", "lwp") +TARGET_BUILTIN(__builtin_ia32_lwpval32, "vUiUiUi", "", "lwp") + // SHA TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "", "sha") TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "", "sha") Index: cfe/trunk/include/clang/Basic/BuiltinsX86_64.def =================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86_64.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86_64.def @@ -69,6 +69,8 @@ TARGET_BUILTIN(__builtin_ia32_pdep_di, "ULLiULLiULLi", "", "bmi2") TARGET_BUILTIN(__builtin_ia32_pext_di, "ULLiULLiULLi", "", "bmi2") TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "", "tbm") +TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "", "lwp") +TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "", "lwp") TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_gpr_mask, "V2LLiULLiV2LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_gpr_mask, "V4LLiULLiV4LLiUc","","avx512vl") Index: cfe/trunk/include/clang/Driver/Options.td =================================================================== --- cfe/trunk/include/clang/Driver/Options.td +++ cfe/trunk/include/clang/Driver/Options.td @@ -1752,6 +1752,7 @@ def mno_bmi2 : Flag<["-"], "mno-bmi2">, Group; def mno_popcnt : Flag<["-"], "mno-popcnt">, Group; def mno_tbm : Flag<["-"], "mno-tbm">, Group; +def mno_lwp : Flag<["-"], "mno-lwp">, Group; def mno_fma4 : Flag<["-"], "mno-fma4">, Group; def mno_fma : Flag<["-"], "mno-fma">, Group; def mno_xop : Flag<["-"], "mno-xop">, Group; @@ -1951,6 +1952,7 @@ def mbmi2 : Flag<["-"], "mbmi2">, Group; def mpopcnt : Flag<["-"], "mpopcnt">, Group; def mtbm : Flag<["-"], "mtbm">, Group; +def mlwp : Flag<["-"], "mlwp">, Group; def mfma4 : Flag<["-"], "mfma4">, Group; def mfma : Flag<["-"], "mfma">, Group; def mxop : Flag<["-"], "mxop">, Group; Index: cfe/trunk/lib/Basic/Targets.cpp =================================================================== --- cfe/trunk/lib/Basic/Targets.cpp +++ cfe/trunk/lib/Basic/Targets.cpp @@ -2591,6 +2591,7 @@ bool HasRDSEED = false; bool HasADX = false; bool HasTBM = false; + bool HasLWP = false; bool HasFMA = false; bool HasF16C = false; bool HasAVX512CD = false; @@ -3363,6 +3364,7 @@ case CK_BDVER1: // xop implies avx, sse4a and fma4. setFeatureEnabledImpl(Features, "xop", true); + setFeatureEnabledImpl(Features, "lwp", true); setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); @@ -3634,6 +3636,8 @@ HasADX = true; } else if (Feature == "+tbm") { HasTBM = true; + } else if (Feature == "+lwp") { + HasLWP = true; } else if (Feature == "+fma") { HasFMA = true; } else if (Feature == "+f16c") { @@ -3949,6 +3953,9 @@ if (HasTBM) Builder.defineMacro("__TBM__"); + if (HasLWP) + Builder.defineMacro("__LWP__"); + if (HasMWAITX) Builder.defineMacro("__MWAITX__"); @@ -4132,6 +4139,7 @@ .Case("sse4.2", SSELevel >= SSE42) .Case("sse4a", XOPLevel >= SSE4A) .Case("tbm", HasTBM) + .Case("lwp", HasLWP) .Case("x86", true) .Case("x86_32", getTriple().getArch() == llvm::Triple::x86) .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) Index: cfe/trunk/lib/Headers/CMakeLists.txt =================================================================== --- cfe/trunk/lib/Headers/CMakeLists.txt +++ cfe/trunk/lib/Headers/CMakeLists.txt @@ -45,6 +45,7 @@ inttypes.h iso646.h limits.h + lwpintrin.h lzcntintrin.h mm3dnow.h mmintrin.h Index: cfe/trunk/lib/Headers/lwpintrin.h =================================================================== --- cfe/trunk/lib/Headers/lwpintrin.h +++ cfe/trunk/lib/Headers/lwpintrin.h @@ -0,0 +1,150 @@ +/*===---- lwpintrin.h - LWP intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __LWPINTRIN_H +#define __LWPINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lwp"))) + +/// \brief Parses the LWPCB at the specified address and enables +/// profiling if valid. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LLWPCB instruction. +/// +/// \param __addr +/// Address to the new Lightweight Profiling Control Block (LWPCB). If the +/// LWPCB is valid, writes the address into the LWP_CBADDR MSR and enables +/// Lightweight Profiling. +static __inline__ void __DEFAULT_FN_ATTRS +__llwpcb (void *__addr) +{ + __builtin_ia32_llwpcb(__addr); +} + +/// \brief Flushes the LWP state to memory and returns the address of the LWPCB. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the SLWPCB instruction. +/// +/// \return +/// Address to the current Lightweight Profiling Control Block (LWPCB). +/// If LWP is not currently enabled, returns NULL. +static __inline__ void* __DEFAULT_FN_ATTRS +__slwpcb () +{ + return __builtin_ia32_slwpcb(); +} + +/// \brief Inserts programmed event record into the LWP event ring buffer +/// and advances the ring buffer pointer. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LWPINS instruction. +/// +/// \param DATA2 +/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field. +/// \param DATA1 +/// A 32-bit value is inserted into the 32-bit Data1 field. +/// \param FLAGS +/// A 32-bit immediate value is inserted into the 32-bit Flags field. +/// \returns If the ring buffer is full and LWP is running in Synchronized Mode, +/// the event record overwrites the last record in the buffer, the MissedEvents +/// counter in the LWPCB is incremented, the head pointer is not advanced, and +/// 1 is returned. Otherwise 0 is returned. +#define __lwpins32(DATA2, DATA1, FLAGS) \ + (__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \ + (unsigned int) (FLAGS))) + +/// \brief Decrements the LWP programmed value sample event counter. If the result is +/// negative, inserts an event record into the LWP event ring buffer in memory +/// and advances the ring buffer pointer. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LWPVAL instruction. +/// +/// \param DATA2 +/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field. +/// \param DATA1 +/// A 32-bit value is inserted into the 32-bit Data1 field. +/// \param FLAGS +/// A 32-bit immediate value is inserted into the 32-bit Flags field. +#define __lwpval32(DATA2, DATA1, FLAGS) \ + (__builtin_ia32_lwpval32((unsigned int) (DATA2), (unsigned int) (DATA1), \ + (unsigned int) (FLAGS))) + +#ifdef __x86_64__ + +/// \brief Inserts programmed event record into the LWP event ring buffer +/// and advances the ring buffer pointer. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LWPINS instruction. +/// +/// \param DATA2 +/// A 64-bit value is inserted into the 64-bit Data2 field. +/// \param DATA1 +/// A 32-bit value is inserted into the 32-bit Data1 field. +/// \param FLAGS +/// A 32-bit immediate value is inserted into the 32-bit Flags field. +/// \returns If the ring buffer is full and LWP is running in Synchronized Mode, +/// the event record overwrites the last record in the buffer, the MissedEvents +/// counter in the LWPCB is incremented, the head pointer is not advanced, and +/// 1 is returned. Otherwise 0 is returned. +#define __lwpins64(DATA2, DATA1, FLAGS) \ + (__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \ + (unsigned int) (FLAGS))) + +/// \brief Decrements the LWP programmed value sample event counter. If the result is +/// negative, inserts an event record into the LWP event ring buffer in memory +/// and advances the ring buffer pointer. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LWPVAL instruction. +/// +/// \param DATA2 +/// A 64-bit value is and inserted into the 64-bit Data2 field. +/// \param DATA1 +/// A 32-bit value is inserted into the 32-bit Data1 field. +/// \param FLAGS +/// A 32-bit immediate value is inserted into the 32-bit Flags field. +#define __lwpval64(DATA2, DATA1, FLAGS) \ + (__builtin_ia32_lwpval64((unsigned long long) (DATA2), (unsigned int) (DATA1), \ + (unsigned int) (FLAGS))) + +#endif + +#undef __DEFAULT_FN_ATTRS + +#endif /* __LWPINTRIN_H */ Index: cfe/trunk/lib/Headers/x86intrin.h =================================================================== --- cfe/trunk/lib/Headers/x86intrin.h +++ cfe/trunk/lib/Headers/x86intrin.h @@ -72,6 +72,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LWP__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) #include #endif Index: cfe/trunk/test/CodeGen/lwp-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/lwp-builtins.c +++ cfe/trunk/test/CodeGen/lwp-builtins.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lwp -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +void test_llwpcb(void *ptr) { + // CHECK-LABEL: @test_llwpcb + // CHECK: call void @llvm.x86.llwpcb(i8* %{{.*}}) + __llwpcb(ptr); +} + +void* test_slwpcb() { + // CHECK-LABEL: @test_slwpcb + // CHECK: call i8* @llvm.x86.slwpcb() + return __slwpcb(); +} + +unsigned char test_lwpins32(unsigned val2, unsigned val1) { + // CHECK-LABEL: @test_lwpins32 + // CHECK: call i8 @llvm.x86.lwpins32(i32 + return __lwpins32(val2, val1, 0x01234); +} + +unsigned char test_lwpins64(unsigned long long val2, unsigned val1) { + // CHECK-LABEL: @test_lwpins64 + // CHECK: call i8 @llvm.x86.lwpins64(i64 + return __lwpins64(val2, val1, 0x56789); +} + +void test_lwpval32(unsigned val2, unsigned val1) { + // CHECK-LABEL: @test_lwpval32 + // CHECK: call void @llvm.x86.lwpval32(i32 + __lwpval32(val2, val1, 0x01234); +} + +void test_lwpval64(unsigned long long val2, unsigned val1) { + // CHECK-LABEL: @test_lwpval64 + // CHECK: call void @llvm.x86.lwpval64(i64 + __lwpval64(val2, val1, 0xABCDEF); +}