diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -3335,6 +3335,8 @@ .. option:: -mtsxldtrk, -mno-tsxldtrk +.. option:: -muintr, -mno-uintr + .. option:: -mvaes, -mno-vaes .. option:: -mvpclmulqdq, -mno-vpclmulqdq diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -200,7 +200,9 @@ implies -mtune=. -mtune=generic is the default with no -march or -mtune specified. -- Support for feature ``HRESET`` has been added. +- Support for ``HRESET`` instructions has been added. + +- Support for ``UINTR`` instructions has been added. Internal API Changes -------------------- diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def --- a/clang/include/clang/Basic/BuiltinsX86_64.def +++ b/clang/include/clang/Basic/BuiltinsX86_64.def @@ -94,6 +94,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fUOiIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri") +// UINTR +TARGET_BUILTIN(__builtin_ia32_clui, "v", "n", "uintr") +TARGET_BUILTIN(__builtin_ia32_stui, "v", "n", "uintr") +TARGET_BUILTIN(__builtin_ia32_testui, "Uc", "n", "uintr") +TARGET_BUILTIN(__builtin_ia32_senduipi, "vUWi", "n", "uintr") + // AMX TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile") TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3321,6 +3321,8 @@ def mno_tbm : Flag<["-"], "mno-tbm">, Group; def mtsxldtrk : Flag<["-"], "mtsxldtrk">, Group; def mno_tsxldtrk : Flag<["-"], "mno-tsxldtrk">, Group; +def muintr : Flag<["-"], "muintr">, Group; +def mno_uintr : Flag<["-"], "mno-uintr">, Group; def mvaes : Flag<["-"], "mvaes">, Group; def mno_vaes : Flag<["-"], "mno-vaes">, Group; def mvpclmulqdq : Flag<["-"], "mvpclmulqdq">, Group; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -135,6 +135,7 @@ bool HasAMXBF16 = false; bool HasSERIALIZE = false; bool HasTSXLDTRK = false; + bool HasUINTR = false; protected: llvm::X86::CPUKind CPU = llvm::X86::CK_None; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -310,6 +310,8 @@ HasSERIALIZE = true; } else if (Feature == "+tsxldtrk") { HasTSXLDTRK = true; + } else if (Feature == "+uintr") { + HasUINTR = true; } X86SSEEnum Level = llvm::StringSwitch(Feature) @@ -726,6 +728,8 @@ Builder.defineMacro("__SERIALIZE__"); if (HasTSXLDTRK) Builder.defineMacro("__TSXLDTRK__"); + if (HasUINTR) + Builder.defineMacro("__UINTR__"); // Each case falls through to the previous one here. switch (SSELevel) { @@ -889,6 +893,7 @@ .Case("sse4a", true) .Case("tbm", true) .Case("tsxldtrk", true) + .Case("uintr", true) .Case("vaes", true) .Case("vpclmulqdq", true) .Case("wbnoinvd", true) @@ -980,6 +985,7 @@ .Case("sse4a", XOPLevel >= SSE4A) .Case("tbm", HasTBM) .Case("tsxldtrk", HasTSXLDTRK) + .Case("uintr", HasUINTR) .Case("vaes", HasVAES) .Case("vpclmulqdq", HasVPCLMULQDQ) .Case("wbnoinvd", HasWBNOINVD) diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -113,6 +113,7 @@ tgmath.h tmmintrin.h tsxldtrkintrin.h + uintrintrin.h unwind.h vadefs.h vaesintrin.h diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -186,6 +186,7 @@ /* Features in %edx for leaf 7 sub-leaf 0 */ #define bit_AVX5124VNNIW 0x00000004 #define bit_AVX5124FMAPS 0x00000008 +#define bit_UINTR 0x00000020 #define bit_SERIALIZE 0x00004000 #define bit_TSXLDTRK 0x00010000 #define bit_PCONFIG 0x00040000 diff --git a/clang/lib/Headers/uintrintrin.h b/clang/lib/Headers/uintrintrin.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/uintrintrin.h @@ -0,0 +1,150 @@ +/*===------------------ uintrintrin.h - UINTR intrinsics -------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86GPRINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __UINTRINTRIN_H +#define __UINTRINTRIN_H + +/* Define the default attributes for the functions in this file */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("uintr"))) + +#ifdef __x86_64__ + +/// Clears the user interrupt flag (UIF). Its effect takes place immediately: a +/// user interrupt cannot be delivered on the instruction boundary following +/// CLUI. Can be executed only if CR4.UINT = 1, the logical processor is in +/// 64-bit mode, and software is not executing inside an enclave; otherwise, +/// each causes an invalid-opcode exception. Causes a transactional abort if +/// executed inside a transactional region; the abort loads EAX as it would +/// had it been due to an execution of CLI. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CLUI instruction. +/// +/// \operation +/// UIF := 0 +/// \endoperation +static __inline__ void __DEFAULT_FN_ATTRS +_clui (void) +{ + __builtin_ia32_clui(); +} + +/// Sets the user interrupt flag (UIF). Its effect takes place immediately; a +/// user interrupt may be delivered on the instruction boundary following +/// STUI. Can be executed only if CR4.UINT = 1, the logical processor is in +/// 64-bit mode, and software is not executing inside an enclave; otherwise, +/// each causes an invalid-opcode exception. Causes a transactional abort if +/// executed inside a transactional region; the abort loads EAX as it would +/// had it been due to an execution of STI. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the STUI instruction. +/// +/// \operation +/// UIF := 1 +/// \endoperation +static __inline__ void __DEFAULT_FN_ATTRS +_stui (void) +{ + __builtin_ia32_stui(); +} + +/// Get the current value of the user interrupt flag (UIF). Can be executed +/// regardless of CPL and inside a transactional region. Can be executed only +/// if CR4.UINT = 1, the logical processor is in 64-bit mode, and software is +/// not executing inside an enclave; otherwise, it causes an invalid-opcode +/// exception. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TESTUI instruction. +/// +/// \returns The current value of the user interrupt flag (UIF). +/// +/// \operation +/// CF := UIF +/// ZF := 0 +/// AF := 0 +/// OF := 0 +/// PF := 0 +/// SF := 0 +/// dst := CF +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_testui (void) +{ + return __builtin_ia32_testui(); +} + +/// Send interprocessor user interrupt. Can be executed only if +/// CR4.UINT = IA32_UINT_TT[0] = 1, the logical processor is in 64-bit mode, +/// and software is not executing inside an enclave; otherwise, it causes an +/// invalid-opcode exception. May be executed at any privilege level, all of +/// its memory accesses are performed with supervisor privilege. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the SENDUIPI instruction +/// +/// \param __a +/// Index of user-interrupt target table entry in user-interrupt target +/// table. +/// +/// \operation +/// IF __a > UITTSZ +/// GP (0) +/// FI +/// tempUITTE := MEM[UITTADDR + (a<<4)] +/// // tempUITTE must be valid, and can't have any reserved bit set +/// IF (tempUITTE.V == 0 OR tempUITTE[7:1] != 0) +/// GP (0) +/// FI +/// tempUPID := MEM[tempUITTE.UPIDADDR] // under lock +/// // tempUPID can't have any reserved bit set +/// IF (tempUPID[15:2] != 0 OR tempUPID[31:24] != 0) +/// GP (0) // release lock +/// FI +/// tempUPID.PIR[tempUITTE.UV] := 1; +/// IF (tempUPID.SN == 0 AND tempUPID.ON == 0) +/// tempUPID.ON := 1 +/// sendNotify := 1 +/// ELSE +/// sendNotify := 0 +/// FI +/// MEM[tempUITTE.UPIDADDR] := tempUPID // release lock +/// IF sendNotify == 1 +/// IF IA32_APIC_BASE[10] == 1 // local APIC is in x2APIC mode +/// // send ordinary IPI with vector tempUPID.NV to 32-bit physical APIC +/// // ID tempUPID.NDST +/// SendOrdinaryIPI(tempUPID.NV, tempUPID.NDST) +/// ELSE +/// // send ordinary IPI with vector tempUPID.NV to 8-bit physical APIC +/// // ID tempUPID.NDST[15:8] +/// SendOrdinaryIPI(tempUPID.NV, tempUPID.NDST[15:8]) +/// FI +/// FI +/// \endoperation +static __inline__ void __DEFAULT_FN_ATTRS +_senduipi (unsigned long long __a) +{ + __builtin_ia32_senduipi(__a); +} + +#endif /* __x86_64__ */ + +#undef __DEFAULT_FN_ATTRS + +#endif /* __UINTRINTRIN_H */ diff --git a/clang/lib/Headers/x86gprintrin.h b/clang/lib/Headers/x86gprintrin.h --- a/clang/lib/Headers/x86gprintrin.h +++ b/clang/lib/Headers/x86gprintrin.h @@ -15,4 +15,9 @@ #include #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__UINTR__) +#include +#endif + #endif /* __X86GPRINTRIN_H */ diff --git a/clang/test/CodeGen/X86/x86-uintr-builtins.c b/clang/test/CodeGen/X86/x86-uintr-builtins.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/X86/x86-uintr-builtins.c @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple x86_64-unknown-unknown -target-feature +uintr -emit-llvm -o - | FileCheck %s + +#include + +void test_clui() { +// CHECK-LABEL: @test_clui +// CHECK: call void @llvm.x86.clui() +// CHECK: ret + _clui(); +} + +void test_stui() { +// CHECK-LABEL: @test_stui +// CHECK: call void @llvm.x86.stui() +// CHECK: ret + _stui(); +} + +unsigned char test_testui() { +// CHECK-LABEL: @test_testui +// CHECK: %[[TMP0:.+]] = call i8 @llvm.x86.testui() +// CHECK: ret i8 %[[TMP0]] + return _testui(); +} + +void test_senduipi(unsigned long long a) { +// CHECK-LABEL: @test_senduipi +// CHECK: call void @llvm.x86.senduipi(i64 %{{.+}}) +// CHECK: ret + _senduipi(a); +} diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -283,3 +283,8 @@ // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-hreset %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-HRESET %s // HRESET: "-target-feature" "+hreset" // NO-HRESET: "-target-feature" "-hreset" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -muintr %s -### -o %t.o 2>&1 | FileCheck -check-prefix=UINTR %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-uintr %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-UINTR %s +// UINTR: "-target-feature" "+uintr" +// NO-UINTR: "-target-feature" "-uintr" diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -1688,6 +1688,7 @@ // CHECK_SPR_M32: #define __SSE__ 1 // CHECK_SPR_M32: #define __SSSE3__ 1 // CHECK_SPR_M32: #define __TSXLDTRK__ 1 +// CHECK_SPR_M32: #define __UINTR__ 1 // CHECK_SPR_M32: #define __VAES__ 1 // CHECK_SPR_M32: #define __VPCLMULQDQ__ 1 // CHECK_SPR_M32: #define __WAITPKG__ 1 @@ -1757,6 +1758,7 @@ // CHECK_SPR_M64: #define __SSE__ 1 // CHECK_SPR_M64: #define __SSSE3__ 1 // CHECK_SPR_M64: #define __TSXLDTRK__ 1 +// CHECK_SPR_M64: #define __UINTR__ 1 // CHECK_SPR_M64: #define __VAES__ 1 // CHECK_SPR_M64: #define __VPCLMULQDQ__ 1 // CHECK_SPR_M64: #define __WAITPKG__ 1 diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -536,3 +536,11 @@ // RUN: %clang -target i386-unknown-unknown -march=atom -mno-hreset -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOHRESET %s // NOHRESET-NOT: #define __HRESET__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -muintr -x c -E -dM -o - %s | FileCheck -check-prefix=UINTR %s + +// UINTR: #define __UINTR__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-uintr -x c -E -dM -o - %s | FileCheck -check-prefix=NOUINTR %s + +// NOUINTR-NOT: #define __UINTR__ 1 diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -118,7 +118,8 @@ the "target-cpu" attribute or TargetMachine CPU which will be used to select Instruction Set. If the attribute is not present, the tune CPU will follow the target CPU. -* Support for feature ``HRESET`` has been added. +* Support for ``HRESET`` instructions has been added. +* Support for ``UINTR`` instructions has been added. Changes to the AMDGPU Target ----------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -5043,3 +5043,17 @@ [ImmArg>, ImmArg>, ImmArg>]>; } + +//===----------------------------------------------------------------------===// +// UINTR - User Level Interrupt + +let TargetPrefix = "x86" in { + def int_x86_clui : GCCBuiltin<"__builtin_ia32_clui">, + Intrinsic<[], [], []>; + def int_x86_stui : GCCBuiltin<"__builtin_ia32_stui">, + Intrinsic<[], [], []>; + def int_x86_testui : GCCBuiltin<"__builtin_ia32_testui">, + Intrinsic<[llvm_i8_ty], [], []>; + def int_x86_senduipi : GCCBuiltin<"__builtin_ia32_senduipi">, + Intrinsic<[], [llvm_i64_ty], []>; +} diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -178,6 +178,7 @@ X86_FEATURE (SHSTK, "shstk") X86_FEATURE (TBM, "tbm") X86_FEATURE (TSXLDTRK, "tsxldtrk") +X86_FEATURE (UINTR, "uintr") X86_FEATURE (VAES, "vaes") X86_FEATURE (VZEROUPPER, "vzeroupper") X86_FEATURE (WAITPKG, "waitpkg") diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1475,6 +1475,7 @@ Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); + Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1); Features["avx512vp2intersect"] = HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -204,7 +204,8 @@ FeaturesICLServer | FeatureAMX_TILE | FeatureAMX_INT8 | FeatureAMX_BF16 | FeatureAVX512BF16 | FeatureAVX512VP2INTERSECT | FeatureCLDEMOTE | FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE | - FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureWAITPKG; + FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureUINTR | + FeatureWAITPKG; // Intel Atom processors. // Bonnell has feature parity with Core2 and adds MOVBE. @@ -481,6 +482,7 @@ constexpr FeatureBitset ImpliedFeaturesSHSTK = {}; constexpr FeatureBitset ImpliedFeaturesTBM = {}; constexpr FeatureBitset ImpliedFeaturesTSXLDTRK = {}; +constexpr FeatureBitset ImpliedFeaturesUINTR = {}; constexpr FeatureBitset ImpliedFeaturesWAITPKG = {}; constexpr FeatureBitset ImpliedFeaturesWBNOINVD = {}; constexpr FeatureBitset ImpliedFeaturesVZEROUPPER = {}; diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -291,6 +291,8 @@ "Has serialize instruction">; def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", "Support TSXLDTRK instructions">; +def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", + "Has UINTR Instructions">; // On some processors, instructions that implicitly take two memory operands are // slow. In practice, this means that CALL, PUSH, and POP with memory operands // should be avoided in favor of a MOV + register CALL/PUSH/POP. @@ -772,7 +774,8 @@ FeatureSHSTK, FeatureVP2INTERSECT, FeatureMOVDIRI, - FeatureMOVDIR64B]; + FeatureMOVDIR64B, + FeatureUINTR]; list SPRTuning = ICXTuning; list SPRFeatures = !listconcat(ICXFeatures, SPRAdditionalFeatures); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -711,6 +711,9 @@ // For avx512-vp2intersect VP2INTERSECT, + // User level interrupts - testui + TESTUI, + /// X86 strict FP compare instructions. STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, STRICT_FCMPS, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26088,6 +26088,15 @@ Operation.getValue(7), Operation.getValue(8), Operation.getValue(9)}); } + case Intrinsic::x86_testui: { + SDLoc dl(Op); + SDValue Chain = Op.getOperand(0); + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + SDValue Operation = DAG.getNode(X86ISD::TESTUI, dl, VTs, Chain); + SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG); + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, + Operation.getValue(1)); + } } return SDValue(); } @@ -31105,6 +31114,7 @@ NODE_NAME_CASE(AESDECWIDE128KL) NODE_NAME_CASE(AESENCWIDE256KL) NODE_NAME_CASE(AESDECWIDE256KL) + NODE_NAME_CASE(TESTUI) } return nullptr; #undef NODE_NAME_CASE diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -316,6 +316,9 @@ [SDNPHasChain, SDNPSideEffect]>; def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD, [SDNPHasChain, SDNPSideEffect]>; +def X86testui : SDNode<"X86ISD::TESTUI", + SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>, + [SDNPHasChain, SDNPSideEffect]>; def X86aesenc128kl : SDNode<"X86ISD::AESENC128KL", SDT_X86AESENCDECKL, [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, @@ -978,6 +981,7 @@ def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">; def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">; def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">; +def HasUINTR : Predicate<"Subtarget->hasUINTR()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, @@ -2938,6 +2942,25 @@ [(int_x86_xresldtrk)]>, XD; } +//===----------------------------------------------------------------------===// +// UINTR Instructions +// +let Predicates = [HasUINTR, In64BitMode] in { + def UIRET : I<0x01, MRM_EC, (outs), (ins), "uiret", + []>, XS; + def CLUI : I<0x01, MRM_EE, (outs), (ins), "clui", + [(int_x86_clui)]>, XS; + def STUI : I<0x01, MRM_EF, (outs), (ins), "stui", + [(int_x86_stui)]>, XS; + + def SENDUIPI : I<0xC7, MRM6r, (outs), (ins GR64:$arg), "senduipi\t$arg", + [(int_x86_senduipi GR64:$arg)]>, XS; + + let Defs = [EFLAGS] in + def TESTUI : I<0x01, MRM_ED, (outs), (ins), "testui", + [(set EFLAGS, (X86testui))]>, XS; +} + //===----------------------------------------------------------------------===// // Pattern fragments to auto generate TBM instructions. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -415,6 +415,9 @@ bool HasAMXBF16 = false; bool HasAMXINT8 = false; + /// Processor supports User Level Interrupt instructions + bool HasUINTR = false; + /// Processor has a single uop BEXTR implementation. bool HasFastBEXTR = false; @@ -742,6 +745,7 @@ bool hasHRESET() const { return HasHRESET; } bool hasSERIALIZE() const { return HasSERIALIZE; } bool hasTSXLDTRK() const { return HasTSXLDTRK; } + bool hasUINTR() const { return HasUINTR; } bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; } bool useRetpolineIndirectBranches() const { return UseRetpolineIndirectBranches; diff --git a/llvm/test/CodeGen/X86/uintr-intrinsics.ll b/llvm/test/CodeGen/X86/uintr-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/uintr-intrinsics.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+uintr | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mattr=+uintr | FileCheck %s --check-prefix=X32 + +define i8 @test_uintr(i64 %arg) { +; X64-LABEL: test_uintr: +; X64: # %bb.0: # %entry +; X64-NEXT: clui +; X64-NEXT: stui +; X64-NEXT: senduipi %rdi +; X64-NEXT: testui +; X64-NEXT: setb %al +; X64-NEXT: retq + +; X32-LABEL: test_uintr: +; X32: # %bb.0: # %entry +; X32-NEXT: clui +; X32-NEXT: stui +; X32-NEXT: senduipi %rdi +; X32-NEXT: testui +; X32-NEXT: setb %al +; X32-NEXT: retq +entry: + call void @llvm.x86.clui() + call void @llvm.x86.stui() + call void @llvm.x86.senduipi(i64 %arg) + %0 = call i8 @llvm.x86.testui() + ret i8 %0 +} + +declare void @llvm.x86.clui() +declare void @llvm.x86.stui() +declare i8 @llvm.x86.testui() +declare void @llvm.x86.senduipi(i64 %arg) diff --git a/llvm/test/MC/Disassembler/X86/x86-64.txt b/llvm/test/MC/Disassembler/X86/x86-64.txt --- a/llvm/test/MC/Disassembler/X86/x86-64.txt +++ b/llvm/test/MC/Disassembler/X86/x86-64.txt @@ -715,3 +715,27 @@ # CHECK: hreset $1 0xf3 0x0f 0x3a 0xf0 0xc0 0x01 + +# CHECK: uiret +0xf3,0x0f,0x01,0xec + +# CHECK: clui +0xf3,0x0f,0x01,0xee + +# CHECK: stui +0xf3,0x0f,0x01,0xef + +# CHECK: testui +0xf3,0x0f,0x01,0xed + +# CHECK: senduipi %rax +0xf3,0x0f,0xc7,0xf0 + +# CHECK: senduipi %rdx +0xf3,0x0f,0xc7,0xf2 + +# CHECK: senduipi %r8 +0xf3,0x41,0x0f,0xc7,0xf0 + +# CHECK: senduipi %r13 +0xf3,0x41,0x0f,0xc7,0xf5 diff --git a/llvm/test/MC/X86/x86-64.s b/llvm/test/MC/X86/x86-64.s --- a/llvm/test/MC/X86/x86-64.s +++ b/llvm/test/MC/X86/x86-64.s @@ -2018,3 +2018,35 @@ // CHECK: hreset // CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01] hreset $1 + +// CHECK: uiret +// CHECK: encoding: [0xf3,0x0f,0x01,0xec] +uiret + +// CHECK: clui +// CHECK: encoding: [0xf3,0x0f,0x01,0xee] +clui + +// CHECK: stui +// CHECK: encoding: [0xf3,0x0f,0x01,0xef] +stui + +// CHECK: testui +// CHECK: encoding: [0xf3,0x0f,0x01,0xed] +testui + +// CHECK: senduipi %rax +// CHECK: encoding: [0xf3,0x0f,0xc7,0xf0] +senduipi %rax + +// CHECK: senduipi %rdx +// CHECK: encoding: [0xf3,0x0f,0xc7,0xf2] +senduipi %rdx + +// CHECK: senduipi %r8 +// CHECK: encoding: [0xf3,0x41,0x0f,0xc7,0xf0] +senduipi %r8 + +// CHECK: senduipi %r13 +// CHECK: encoding: [0xf3,0x41,0x0f,0xc7,0xf5] +senduipi %r13