diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst --- a/llvm/docs/CommandGuide/llvm-exegesis.rst +++ b/llvm/docs/CommandGuide/llvm-exegesis.rst @@ -213,6 +213,15 @@ could occur if the sampling is too frequent. A prime number should be used to avoid consistently skipping certain blocks. +.. option:: -x86-disable-upper-sse-registers + + Using the upper xmm registers (xmm8-xmm15) forces a longer instruction encoding + which may put greater pressure on the frontend fetch and decode stages, + potentially reducing the rate that instructions are dispatched to the backend, + particularly on older hardware. Comparing baseline results with this mode + enabled can help determine the effects of the frontend and can be used to + improve latency and throughput estimates. + .. option:: -repetition-mode=[duplicate|loop|min] Specify the repetition mode. `duplicate` will create a large, straight line diff --git a/llvm/test/tools/llvm-exegesis/X86/inverse_throughput/inverse_throughput-disable-upper-sse-registers.s b/llvm/test/tools/llvm-exegesis/X86/inverse_throughput/inverse_throughput-disable-upper-sse-registers.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/inverse_throughput/inverse_throughput-disable-upper-sse-registers.s @@ -0,0 +1,10 @@ +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=inverse_throughput --skip-measurements -x86-disable-upper-sse-registers -opcode-name=ADDPSrr -repetition-mode=loop | FileCheck %s + +CHECK: --- +CHECK-NEXT: mode: inverse_throughput +CHECK-NEXT: key: +CHECK-NEXT: instructions: +CHECK-NEXT: - 'ADDPSrr [[LHS0:XMM[0-7]]] [[LHS0]] [[RHS0:XMM[0-7]]]' +CHECK-NEXT: - 'ADDPSrr [[LHS1:XMM[0-7]]] [[LHS1]] [[RHS1:XMM[0-7]]]' +CHECK-NEXT: - 'ADDPSrr [[LHS2:XMM[0-7]]] [[LHS2]] [[RHS2:XMM[0-7]]]' +CHECK-NEXT: - 'ADDPSrr [[LHS3:XMM[0-7]]] [[LHS3]] [[RHS3:XMM[0-7]]]' diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -54,6 +54,11 @@ cl::desc("The sample period (nbranches/sample), used for LBR sampling"), cl::cat(BenchmarkOptions), cl::init(0)); +static cl::opt + DisableUpperSSERegisters("x86-disable-upper-sse-registers", + cl::desc("Disable XMM8-XMM15 register usage"), + cl::cat(BenchmarkOptions), cl::init(false)); + // FIXME: Validates that repetition-mode is loop if LBR is requested. // Returns a non-null reason if we cannot handle the memory references in this @@ -708,6 +713,11 @@ const APInt &Value) const override; ArrayRef getUnavailableRegisters() const override { + if (DisableUpperSSERegisters) + return makeArrayRef(kUnavailableRegistersSSE, + sizeof(kUnavailableRegistersSSE) / + sizeof(kUnavailableRegistersSSE[0])); + return makeArrayRef(kUnavailableRegisters, std::size(kUnavailableRegisters)); } @@ -772,6 +782,7 @@ } static const unsigned kUnavailableRegisters[4]; + static const unsigned kUnavailableRegistersSSE[12]; }; // We disable a few registers that cannot be encoded on instructions with a REX @@ -779,6 +790,12 @@ const unsigned ExegesisX86Target::kUnavailableRegisters[4] = {X86::AH, X86::BH, X86::CH, X86::DH}; +// Optionally, also disable the upper (x86_64) SSE registers to reduce frontend +// decoder load. +const unsigned ExegesisX86Target::kUnavailableRegistersSSE[12] = { + X86::AH, X86::BH, X86::CH, X86::DH, X86::XMM8, X86::XMM9, + X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13, X86::XMM14, X86::XMM15}; + // We're using one of R8-R15 because these registers are never hardcoded in // instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less // conflicts.