Index: clang/include/clang/Basic/BuiltinsX86.def =================================================================== --- clang/include/clang/Basic/BuiltinsX86.def +++ clang/include/clang/Basic/BuiltinsX86.def @@ -825,6 +825,7 @@ BUILTIN(__builtin_ia32_rdtscp, "UOiUi*", "") TARGET_BUILTIN(__builtin_ia32_rdpid, "Ui", "n", "rdpid") +TARGET_BUILTIN(__builtin_ia32_rdpru, "ULLii", "n", "rdpru") // PKU TARGET_BUILTIN(__builtin_ia32_rdpkru, "Ui", "n", "pku") Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -4570,6 +4570,8 @@ def mno_ptwrite : Flag<["-"], "mno-ptwrite">, Group; def mrdpid : Flag<["-"], "mrdpid">, Group; def mno_rdpid : Flag<["-"], "mno-rdpid">, Group; +def mrdpru : Flag<["-"], "mrdpru">, Group; +def mno_rdpru : Flag<["-"], "mno-rdpru">, Group; def mrdrnd : Flag<["-"], "mrdrnd">, Group; def mno_rdrnd : Flag<["-"], "mno-rdrnd">, Group; def mrtm : Flag<["-"], "mrtm">, Group; Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -125,6 +125,7 @@ bool HasMOVBE = false; bool HasPREFETCHWT1 = false; bool HasRDPID = false; + bool HasRDPRU = false; bool HasRetpolineExternalThunk = false; bool HasLAHFSAHF = false; bool HasWBNOINVD = false; Index: clang/lib/Basic/Targets/X86.cpp =================================================================== --- clang/lib/Basic/Targets/X86.cpp +++ clang/lib/Basic/Targets/X86.cpp @@ -298,6 +298,8 @@ HasCLDEMOTE = true; } else if (Feature == "+rdpid") { HasRDPID = true; + } else if (Feature == "+rdpru") { + HasRDPRU = true; } else if (Feature == "+kl") { HasKL = true; } else if (Feature == "+widekl") { @@ -742,6 +744,8 @@ Builder.defineMacro("__WIDEKL__"); if (HasRDPID) Builder.defineMacro("__RDPID__"); + if (HasRDPRU) + Builder.defineMacro("__RDPRU__"); if (HasCLDEMOTE) Builder.defineMacro("__CLDEMOTE__"); if (HasWAITPKG) @@ -925,6 +929,7 @@ .Case("prfchw", true) .Case("ptwrite", true) .Case("rdpid", true) + .Case("rdpru", true) .Case("rdrnd", true) .Case("rdseed", true) .Case("rtm", true) @@ -1020,6 +1025,7 @@ .Case("prfchw", HasPRFCHW) .Case("ptwrite", HasPTWRITE) .Case("rdpid", HasRDPID) + .Case("rdpru", HasRDPRU) .Case("rdrnd", HasRDRND) .Case("rdseed", HasRDSEED) .Case("retpoline-external-thunk", HasRetpolineExternalThunk) Index: clang/lib/Headers/CMakeLists.txt =================================================================== --- clang/lib/Headers/CMakeLists.txt +++ clang/lib/Headers/CMakeLists.txt @@ -169,6 +169,7 @@ popcntintrin.h prfchwintrin.h ptwriteintrin.h + rdpruintrin.h rdseedintrin.h rtmintrin.h serializeintrin.h Index: clang/lib/Headers/rdpruintrin.h =================================================================== --- /dev/null +++ clang/lib/Headers/rdpruintrin.h @@ -0,0 +1,57 @@ +/*===---- rdpruintrin.h - RDPRU intrinsics ---------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __RDPRUINTRIN_H +#define __RDPRUINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("rdpru"))) + + +/// Reads the content of a processor register. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the RDPRU instruction. +/// +/// \param reg_id +/// A processor register identifier. +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +__rdpru (int reg_id) +{ + return __builtin_ia32_rdpru(reg_id); +} + +#define __RDPRU_MPERF 0 +#define __RDPRU_APERF 1 + +/// Reads the content of processor register MPERF. +/// +/// \headerfile +/// +/// This intrinsic generates instruction RDPRU to read the value of +/// register MPERF. +#define __mperf() __builtin_ia32_rdpru(__RDPRU_MPERF) + +/// Reads the content of processor register APERF. +/// +/// \headerfile +/// +/// This intrinsic generates instruction RDPRU to read the value of +/// register APERF. +#define __aperf() __builtin_ia32_rdpru(__RDPRU_APERF) + +#undef __DEFAULT_FN_ATTRS + +#endif /* __RDPRUINTRIN_H */ Index: clang/lib/Headers/x86intrin.h =================================================================== --- clang/lib/Headers/x86intrin.h +++ clang/lib/Headers/x86intrin.h @@ -59,5 +59,9 @@ #include #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__RDPRU__) +#include +#endif #endif /* __X86INTRIN_H */ Index: clang/test/CodeGen/rdpru-builtins.c =================================================================== --- /dev/null +++ clang/test/CodeGen/rdpru-builtins.c @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=i686-- -target-feature +rdpru -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +rdpru -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-cpu znver2 -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +// NOTE: This should correspond to the tests in llvm/test/CodeGen/X86/rdpru.ll + +unsigned long long test_rdpru(int regid) { + // CHECK-LABEL: test_rdpru + // CHECK: [[RESULT:%.*]] = call i64 @llvm.x86.rdpru(i32 %{{.*}}) + // CHECK-NEXT: ret i64 [[RESULT]] + return __rdpru(regid); +} + +unsigned long long test_mperf() { + // CHECK-LABEL: test_mperf + // CHECK: [[RESULT:%.*]] = call i64 @llvm.x86.rdpru(i32 0) + // CHECK-NEXT: ret i64 [[RESULT]] + return __mperf(); +} + +unsigned long long test_aperf() { + // CHECK-LABEL: test_aperf + // CHECK: [[RESULT:%.*]] = call i64 @llvm.x86.rdpru(i32 1) + // CHECK-NEXT: ret i64 [[RESULT]] + return __aperf(); +} + +void test_direct_calls_to_builtin_rdpru(int regid) { + // CHECK: call i64 @llvm.x86.rdpru(i32 0) + // CHECK: call i64 @llvm.x86.rdpru(i32 1) + // CHECK: call i64 @llvm.x86.rdpru(i32 %{{.*}}) + (void) __builtin_ia32_rdpru(0); + (void) __builtin_ia32_rdpru(1); + (void) __builtin_ia32_rdpru(regid); +} Index: clang/test/Driver/x86-target-features.c =================================================================== --- clang/test/Driver/x86-target-features.c +++ clang/test/Driver/x86-target-features.c @@ -136,6 +136,11 @@ // RDPID: "-target-feature" "+rdpid" // NO-RDPID: "-target-feature" "-rdpid" +// RUN: %clang --target=i386 -march=i386 -mrdpru %s -### 2>&1 | FileCheck -check-prefix=RDPRU %s +// RUN: %clang --target=i386 -march=i386 -mno-rdpru %s -### 2>&1 | FileCheck -check-prefix=NO-RDPRU %s +// RDPRU: "-target-feature" "+rdpru" +// NO-RDPRU: "-target-feature" "-rdpru" + // RUN: %clang -target i386-linux-gnu -mretpoline %s -### 2>&1 | FileCheck -check-prefix=RETPOLINE %s // RUN: %clang -target i386-linux-gnu -mno-retpoline %s -### 2>&1 | FileCheck -check-prefix=NO-RETPOLINE %s // RETPOLINE: "-target-feature" "+retpoline-indirect-calls" "-target-feature" "+retpoline-indirect-branches" Index: clang/test/Preprocessor/x86_target_features.c =================================================================== --- clang/test/Preprocessor/x86_target_features.c +++ clang/test/Preprocessor/x86_target_features.c @@ -588,3 +588,11 @@ // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 -x c -E -dM -o - %s | FileCheck -check-prefix=NOCRC32 %s // NOCRC32-NOT: #define __CRC32__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mrdpru -x c -E -dM -o - %s | FileCheck -check-prefix=RDPRU %s + +// RDPRU: #define __RDPRU__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-rdpru -x c -E -dM -o - %s | FileCheck -check-prefix=NORDPRU %s + +// NORDPRU-NOT: #define __RDPRU__ 1 Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -155,6 +155,8 @@ -------------------------- * Support ``half`` type on SSE2 and above targets. +* Support ``rdpru`` instruction on Zen2 and above targets. The ``-m[no-]rdpru`` + option to Clang controls this independently of the target processor type. Changes to the OCaml bindings ----------------------------- Index: llvm/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsX86.td +++ llvm/include/llvm/IR/IntrinsicsX86.td @@ -72,6 +72,12 @@ [ImmArg>]>; } +// Read Processor Register. +let TargetPrefix = "x86" in { + def int_x86_rdpru : ClangBuiltin<"__builtin_ia32_rdpru">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>; +} + //===----------------------------------------------------------------------===// // CET SS let TargetPrefix = "x86" in { Index: llvm/include/llvm/Support/X86TargetParser.def =================================================================== --- llvm/include/llvm/Support/X86TargetParser.def +++ llvm/include/llvm/Support/X86TargetParser.def @@ -178,6 +178,7 @@ X86_FEATURE (PRFCHW, "prfchw") X86_FEATURE (PTWRITE, "ptwrite") X86_FEATURE (RDPID, "rdpid") +X86_FEATURE (RDPRU, "rdpru") X86_FEATURE (RDRND, "rdrnd") X86_FEATURE (RDSEED, "rdseed") X86_FEATURE (RTM, "rtm") Index: llvm/lib/Support/X86TargetParser.cpp =================================================================== --- llvm/lib/Support/X86TargetParser.cpp +++ llvm/lib/Support/X86TargetParser.cpp @@ -285,8 +285,9 @@ FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A | FeatureXSAVE | FeatureXSAVEC | FeatureXSAVEOPT | FeatureXSAVES; -constexpr FeatureBitset FeaturesZNVER2 = - FeaturesZNVER1 | FeatureCLWB | FeatureRDPID | FeatureWBNOINVD; +constexpr FeatureBitset FeaturesZNVER2 = FeaturesZNVER1 | FeatureCLWB | + FeatureRDPID | FeatureRDPRU | + FeatureWBNOINVD; static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 | FeatureINVPCID | FeaturePKU | FeatureVAES | FeatureVPCLMULQDQ; @@ -490,6 +491,7 @@ constexpr FeatureBitset ImpliedFeaturesPRFCHW = {}; constexpr FeatureBitset ImpliedFeaturesPTWRITE = {}; constexpr FeatureBitset ImpliedFeaturesRDPID = {}; +constexpr FeatureBitset ImpliedFeaturesRDPRU = {}; constexpr FeatureBitset ImpliedFeaturesRDRND = {}; constexpr FeatureBitset ImpliedFeaturesRDSEED = {}; constexpr FeatureBitset ImpliedFeaturesRTM = {}; Index: llvm/lib/Target/X86/X86.td =================================================================== --- llvm/lib/Target/X86/X86.td +++ llvm/lib/Target/X86/X86.td @@ -266,6 +266,8 @@ "Write Back No Invalidate">; def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", "Support RDPID instructions">; +def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true", + "Support RDPRU instructions">; def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", "Wait and pause enhancements">; def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", @@ -1238,6 +1240,7 @@ TuningInsertVZEROUPPER]; list ZN2AdditionalFeatures = [FeatureCLWB, FeatureRDPID, + FeatureRDPRU, FeatureWBNOINVD]; list ZN2Tuning = ZNTuning; list ZN2Features = Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27880,11 +27880,14 @@ } // Read Performance Monitoring Counters. case RDPMC: + // Read Processor Register. + case RDPRU: // GetExtended Control Register. case XGETBV: { SmallVector Results; // RDPMC uses ECX to select the index of the performance counter to read. + // RDPRU uses ECX to select the processor register to read. // XGETBV uses ECX to select the index of the XCR register to return. // The result is stored into registers EDX:EAX. expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX, @@ -32887,6 +32890,10 @@ expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget, Results); return; + case Intrinsic::x86_rdpru: + expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPRU, X86::ECX, Subtarget, + Results); + return; case Intrinsic::x86_xgetbv: expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget, Results); Index: llvm/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.td +++ llvm/lib/Target/X86/X86InstrInfo.td @@ -978,6 +978,7 @@ def HasCLWB : Predicate<"Subtarget->hasCLWB()">; def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">; def HasRDPID : Predicate<"Subtarget->hasRDPID()">; +def HasRDPRU : Predicate<"Subtarget->hasRDPRU()">; def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">; def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">; def HasCX8 : Predicate<"Subtarget->hasCX8()">; Index: llvm/lib/Target/X86/X86InstrSystem.td =================================================================== --- llvm/lib/Target/X86/X86InstrSystem.td +++ llvm/lib/Target/X86/X86InstrSystem.td @@ -734,6 +734,15 @@ Requires<[In64BitMode, HasPTWRITE]>; } // SchedRW +//===----------------------------------------------------------------------===// +// RDPRU - Read Processor Register instruction. + +let SchedRW = [WriteSystem] in { +let Uses = [ECX], Defs = [EAX, EDX] in + def RDPRU : I<0x01, MRM_FD, (outs), (ins), "rdpru", []>, PS, + Requires<[HasRDPRU]>; +} + //===----------------------------------------------------------------------===// // Platform Configuration instruction Index: llvm/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -37,7 +37,7 @@ TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2DQ_MASK, CVTQQ2PS_MASK, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, FIXUPIMM, FIXUPIMM_MASKZ, GATHER_AVX2, - ROUNDP, ROUNDS + ROUNDP, ROUNDS, RDPRU }; struct IntrinsicData { @@ -309,6 +309,7 @@ X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, 0, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, 0, 0), X86_INTRINSIC_DATA(rdpmc, RDPMC, X86::RDPMC, 0), + X86_INTRINSIC_DATA(rdpru, RDPRU, X86::RDPRU, 0), X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0), X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0), X86_INTRINSIC_DATA(rdrand_64, RDRAND, X86ISD::RDRAND, 0), Index: llvm/test/CodeGen/X86/rdpru.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/rdpru.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+rdpru | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=i686-- -mattr=+rdpru -fast-isel | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+rdpru | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+rdpru -fast-isel | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 -fast-isel | FileCheck %s --check-prefix=X64 + +define void @rdpru_asm() { +; X86-LABEL: rdpru_asm: +; X86: # %bb.0: # %entry +; X86-NEXT: #APP +; X86-NEXT: rdpru +; X86-NEXT: #NO_APP +; X86-NEXT: retl +; +; X64-LABEL: rdpru_asm: +; X64: # %bb.0: # %entry +; X64-NEXT: #APP +; X64-NEXT: rdpru +; X64-NEXT: #NO_APP +; X64-NEXT: retq +entry: + call void asm sideeffect "rdpru", "~{dirflag},~{fpsr},~{flags}"() + ret void +} + +define i64 @rdpru_param(i32 %regid) local_unnamed_addr { +; X86-LABEL: rdpru_param: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: rdpru +; X86-NEXT: retl +; +; X64-LABEL: rdpru_param: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: rdpru +; X64-NEXT: shlq $32, %rdx +; X64-NEXT: orq %rdx, %rax +; X64-NEXT: retq +entry: + %0 = tail call i64 @llvm.x86.rdpru(i32 %regid) + ret i64 %0 +} + +define i64 @rdpru_mperf() local_unnamed_addr { +; X86-LABEL: rdpru_mperf: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: rdpru +; X86-NEXT: retl +; +; X64-LABEL: rdpru_mperf: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: rdpru +; X64-NEXT: shlq $32, %rdx +; X64-NEXT: orq %rdx, %rax +; X64-NEXT: retq +entry: + %0 = tail call i64 @llvm.x86.rdpru(i32 0) + ret i64 %0 +} + +define i64 @rdpru_aperf() local_unnamed_addr { +; X86-LABEL: rdpru_aperf: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $1, %ecx +; X86-NEXT: rdpru +; X86-NEXT: retl +; +; X64-LABEL: rdpru_aperf: +; X64: # %bb.0: # %entry +; X64-NEXT: movl $1, %ecx +; X64-NEXT: rdpru +; X64-NEXT: shlq $32, %rdx +; X64-NEXT: orq %rdx, %rax +; X64-NEXT: retq +entry: + %0 = tail call i64 @llvm.x86.rdpru(i32 1) + ret i64 %0 +} + +declare i64 @llvm.x86.rdpru(i32) Index: llvm/test/MC/X86/RDPRU.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/RDPRU.s @@ -0,0 +1,17 @@ +/// Encoding and disassembly of rdpru. + +// RUN: llvm-mc -triple i686-- --show-encoding %s |\ +// RUN: FileCheck %s --check-prefixes=CHECK,ENCODING + +// RUN: llvm-mc -triple i686-- -filetype=obj %s |\ +// RUN: llvm-objdump -d - | FileCheck %s + +// RUN: llvm-mc -triple x86_64-- --show-encoding %s |\ +// RUN: FileCheck %s --check-prefixes=CHECK,ENCODING + +// RUN: llvm-mc -triple x86_64-- -filetype=obj %s |\ +// RUN: llvm-objdump -d - | FileCheck %s + +// CHECK: rdpru +// ENCODING: encoding: [0x0f,0x01,0xfd] +rdpru