diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -96,7 +96,8 @@
Changes to the X86 Target
-------------------------
- During this release ...
+* Machine model for AMD K10 (10h, Barcelona) CPU was added. It is used to
+ support instruction scheduling and other instruction cost heuristics.
Changes to the AMDGPU Target
-----------------------------
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -487,6 +487,7 @@
include "X86SchedBroadwell.td"
include "X86ScheduleSLM.td"
include "X86ScheduleZnver1.td"
+include "X86ScheduleBarcelona.td"
include "X86ScheduleBdVer2.td"
include "X86ScheduleBtVer2.td"
include "X86SchedSkylakeClient.td"
@@ -1145,7 +1146,7 @@
}
foreach P = ["amdfam10", "barcelona"] in {
- def : Proc
;
+ def : ProcessorModel
;
}
// Bobcat
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -163,8 +163,18 @@
def : PfmCountersBinding<"k8-sse3", DefaultAMDPfmCounters>;
def : PfmCountersBinding<"opteron-sse3", DefaultAMDPfmCounters>;
def : PfmCountersBinding<"athlon64-sse3", DefaultAMDPfmCounters>;
-def : PfmCountersBinding<"amdfam10", DefaultAMDPfmCounters>;
-def : PfmCountersBinding<"barcelona", DefaultAMDPfmCounters>;
+
+def BarcelonaPfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"BnFADD", "dispatched_fpu:ops_add + dispatched_fpu:ops_add_pipe_load_ops">,
+ PfmIssueCounter<"BnFMUL", "dispatched_fpu:ops_multiply + dispatched_fpu:ops_multiply_pipe_load_ops">,
+ PfmIssueCounter<"BnFMISC", "dispatched_fpu:ops_store + dispatched_fpu:ops_store_pipe_load_ops">,
+ ];
+}
+def : PfmCountersBinding<"amdfam10", BarcelonaPfmCounters>;
+def : PfmCountersBinding<"barcelona", BarcelonaPfmCounters>;
def BdVer2PfmCounters : ProcPfmCounters {
let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
diff --git a/llvm/lib/Target/X86/X86ScheduleBarcelona.td b/llvm/lib/Target/X86/X86ScheduleBarcelona.td
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/X86/X86ScheduleBarcelona.td
@@ -0,0 +1,764 @@
+//=- X86ScheduleBarcelona.td - X86 Barcelona Scheduling ------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for AMD fam10h (Barcelona) to support
+// instruction scheduling and other instruction cost heuristics.
+// Based on:
+// * Measurements from llvm-exegesis
+// * AMD Software Optimization Guide for AMD Family 10h and 12h Processors
+// https://support.amd.com/TechDocs/40546.pdf
+// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog
+// http://www.agner.org/optimize/microarchitecture.pdf
+// * https://www.realworldtech.com/barcelona/
+//
+//===----------------------------------------------------------------------===//
+
+def BarcelonaModel : SchedMachineModel {
+ let IssueWidth = 3; // Up to 3 IPC can be decoded, issued, retired.
+ let MicroOpBufferSize = 72; // 24 lines of three macro-ops.
+ let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer.
+ let LoadLatency = 3; // The L1 data cache has a 3-cycle load-to-use latency.
+ let HighLatency = 10; // between 96.6'th and 96.7'th and percentiles of all
+ // the instruction latencies llvm-exegesis can measure.
+ let MispredictPenalty = 12; // Minimum branch misdirection penalty.
+
+ let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
+
+ // FIXME: Incomplete. This flag is set to allow the scheduler to assign
+ // a default model to unrecognized opcodes.
+ let CompleteModel = 0;
+} // SchedMachineModel
+
+let SchedModel = BarcelonaModel in {
+
+
+//===----------------------------------------------------------------------===//
+// RCU
+//===----------------------------------------------------------------------===//
+
+// 24 lines of three macro-ops.
+def BnRCU : RetireControlUnit<72, 3>;
+// FIXME: it isn't that simple actually.
+// It's not 72 entries, but more like 24 "entries", each entry tracking
+// up to 3 lanes.
+
+
+//===----------------------------------------------------------------------===//
+// Functional Clusters
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Integer Cluster
+//
+
+// Integer physical register file has 40 registers of 64-bit.
+def BnIntPRF : RegisterFile<40, [GR64, CCR]>;
+
+// There are total of three integer pipes.
+foreach i = 0-2 in {
+ def BnInt#i : ProcResource<1>;
+}
+
+// The integer scheduler is based on a three-wide queuing system (also known as
+// a reservation station) that feeds three integer execution positions or pipes.
+// The reservation stations are eight entries deep, for a total queuing system
+// of 24 integer macro-ops.
+def BnInt : ProcResGroup<[BnInt0, BnInt1, BnInt2]> {
+ let BufferSize = 24;
+}
+
+// *Each* integer pipe has an ALU unit, *and* an AGU unit.
+foreach i = 0-2 in {
+ def BnALU#i : ProcResource<1>;
+ def BnAGU#i : ProcResource<1>; // FIXME: so which SchedWrites use AGU?
+}
+
+// Integer pipe 0 contains multiplication unit.
+// FIXME: when this unit is ocuppied, BnALU{0,1} units are stalled?
+def BnIMUL : ProcResource<1>;
+
+// Integer pipe 2 contains an unit for ABM instructions (popcnt, lzcnt).
+// FIXME: when this unit is ocuppied, BnALU{1,2} units are stalled?
+def BnABM : ProcResource<1>;
+
+// Int pipe grouping.
+def BnInt012 : ProcResGroup<[BnInt0, BnInt1, BnInt2]>;
+
+// ALU unit grouping.
+def BnALU012 : ProcResGroup<[BnALU0, BnALU1, BnALU2]>;
+
+// AGU unit grouping.
+// FIXME: so which SchedWrites use AGU?
+def BnAGU012 : ProcResGroup<[BnAGU0, BnAGU1, BnAGU2]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-Point Cluster
+//
+
+// FP physical register file has 120 registers of 128-bit.
+def BnFpuPRF : RegisterFile<120, [VR64, VR128], [1, 1]>;
+
+// There are total of three floating-point pipes.
+foreach i = 0-2 in {
+ def BnFPU#i : ProcResource<1>;
+}
+
+// The floating-point scheduler has a dedicated 36-entry scheduler buffer,
+// organized as 12 lines of three macro-ops each.
+def BnFPU : ProcResGroup<[BnFPU0, BnFPU1, BnFPU2]> {
+ let BufferSize = 36;
+}
+
+// FPU pipe grouping.
+def BnFPU01 : ProcResGroup<[BnFPU0, BnFPU1]>;
+def BnFPU12 : ProcResGroup<[BnFPU1, BnFPU2]>;
+def BnFPU012 : ProcResGroup<[BnFPU0, BnFPU1, BnFPU2]>;
+
+// FP pipe 0 contains vector (both integer and fp) addition unit.
+def BnFADD : ProcResource<1>;
+
+// FP pipe 1 contains vector (both integer and fp) multiplication unit.
+def BnFMUL : ProcResource<1>;
+
+// FP pipe 2 contains vector conversion/load/store unit.
+def BnFMISC : ProcResource<1>;
+
+// FPU unit grouping.
+// Some instructions can go to either BnFADD or BnFMUL units.
+def BnFAddOrMul : ProcResGroup<[BnFADD, BnFMUL]>;
+// Some instructions can execute on any FPU unit.
+def BnFAny : ProcResGroup<[BnFADD, BnFMUL, BnFMISC]>;
+
+//===----------------------------------------------------------------------===//
+// Load-Store Cluster
+//
+
+// The L1 data cache can support two 128-bit loads or two 64-bit store writes
+// per cycle or a mix of those.
+def BnLSU : ProcResource<2>;
+
+// The LSU consists of two queues—LS1 and LS2.
+
+// LS1 can issue two L1 cache operations (loads or store tag checks) per cycle.
+let Super = BnLSU in
+def BnLoad : ProcResource<2> { // BnLS1
+ let BufferSize = 12;
+}
+
+def BnLoadQueue : LoadQueue;
+
+// FIXME: it's kinda more complicated than that, LS2 handles loads that
+// LS1 failed to handle via L1 cache.
+
+// Store writes are done exclusively from LS2. 128-bit stores are specially
+// handled in that they take two LS2 entries, and the store writes are
+// performed as two 64-bit writes.
+let Super = BnLSU in
+def BnStore : ProcResource<2> { // BnLS2
+ let BufferSize = 32;
+}
+
+def BnStoreQueue : StoreQueue;
+
+
+
+//===----------------------------------------------------------------------===//
+// Basic helper classes.
+//===----------------------------------------------------------------------===//
+
+multiclass BnWriteRes ExePorts,
+ int Lat, list Res, int UOps> {
+ def : WriteRes {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+}
+
+multiclass BnWriteResInt ExePorts, int Lat,
+ list Res, int UOps> {
+ defm : BnWriteRes;
+}
+
+multiclass BnWriteResFPU ExePorts, int Lat,
+ list Res, int UOps> {
+ defm : BnWriteRes;
+}
+
+multiclass __bnWriteResPairInt ExePorts, int Lat,
+ list Res, int UOps, int LoadLat,
+ int LoadUOps> {
+ defm : BnWriteResInt;
+
+ defm : BnWriteResInt;
+}
+
+multiclass __bnWriteResPairFPU ExePorts, int Lat,
+ list Res, int UOps, int LoadLat,
+ int LoadUOps> {
+ defm : BnWriteResFPU;
+
+ defm : BnWriteResFPU;
+}
+
+multiclass BnWriteResIntPair ExePorts, int Lat,
+ list Res, int UOps, int LoadUOps = 0> {
+ defm : __bnWriteResPairInt;
+}
+
+multiclass BnWriteResFPUPair ExePorts, int Lat,
+ list Res, int UOps, int LoadUOps = 0> {
+ defm : __bnWriteResPairFPU;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Here be dragons.
+//===----------------------------------------------------------------------===//
+
+// L1 data cache has a 3-cycle load-to-use latency, so ReadAfterLd registers
+// needn't be available until 3 cycles after the memory operand.
+def : ReadAdvance;
+
+// Vector loads are 2 cycles (yes, less than scalar loads), so ReadAfterVec*Ld
+// registers needn't be available until 2 cycles after the memory operand.
+def : ReadAdvance;
+def : ReadAdvance;
+def : ReadAdvance; // unsupported.
+
+def : ReadAdvance; // Non-applicable?
+
+// A folded store needs a cycle on the BnStore for the store data.
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : BnWriteResInt;
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner. FIXME: split
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+defm : BnWriteResInt;
+
+// Load/store MXCSR.
+defm : BnWriteResInt; // FIXME: latency/uops/rthr not from llvm-exegesis, from AMD SOG/Agner.
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %FPU0, %FPU0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : BnWriteRes; // FIXME
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : BnWriteResIntPair;
+
+////////////////////////////////////////////////////////////////////////////////
+// Special case scheduling classes.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : BnWriteResInt; // FIXME: could split
+defm : BnWriteResInt;
+defm : BnWriteResInt;
+
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+defm : BnWriteResInt; // FIXME
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair;
+
+defm : BnWriteResInt;
+defm : BnWriteResInt;
+
+defm : BnWriteResInt; // FIXME: XCHG8rr is an outlier FIXME: split XCHG/XADD
+defm : BnWriteResIntPair;
+
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis, from AMD SOG. // FIXME: split
+
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair; // FIXME: consumes 2 resources?
+defm : BnWriteResIntPair; // FIXME: consumes 2 resources?
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair; // FIXME: more complicated than that?
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair; // FIXME: consumes 2 resources?
+defm : BnWriteResIntPair; // FIXME: more complicated than that?
+defm : BnWriteResIntPair;
+defm : X86WriteResUnsupported; // BMI2 MULX
+
+// FIXME: latency not from llvm-exegesis, from AMD SOG.
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair;
+
+// FIXME: latency not from llvm-exegesis, from AMD SOG.
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair;
+
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResIntPair; // Conditional move.
+
+// FIXME: latency/uops not from llvm-exegesis, from AMD SOG.
+defm : BnWriteResFPU; // x87 conditional move.
+
+defm : BnWriteResInt; // FIXME: latency is probably wrong, not accounting for the secondary helper instruction.
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+
+defm : BnWriteResInt; // FIXME: latency is probably wrong, not accounting for the secondary helper instruction. FIXME: split?
+
+defm : BnWriteResInt; // FIXME: latency is probably wrong, not accounting for the secondary helper instruction.
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis, from Agner.
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis, from Agner.
+defm : BnWriteResInt;
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis, from Agner. FIXME: split
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis, from Agner.
+
+// This is for simple LEAs with one or two input operands and no scale.
+// FIXME: with scale and/or 3-operand LEA: lat=2
+defm : BnWriteResInt; // FIXME: latency/uops not from llvm-exegesis, from Agner.
+
+// Bit counts.
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair;
+defm : BnWriteResIntPair;
+defm : X86WriteResPairUnsupported;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : BnWriteResIntPair; // FIXME: split
+defm : BnWriteResIntPair; // FIXME: split
+defm : BnWriteResIntPair; // FIXME: split
+defm : BnWriteResIntPair; // FIXME: split
+
+// SHLD/SHRD.
+defm : BnWriteResInt;
+defm : BnWriteResInt;
+
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis
+defm : BnWriteResInt; // FIXME: latency not from llvm-exegesis
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+defm : X86WriteResUnsupported;
+
+defm : X86WriteResUnsupported;
+defm : X86WriteResUnsupported;
+
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+defm : X86WriteResUnsupported;
+
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+defm : X86WriteResUnsupported;
+
+defm : X86WriteResUnsupported;
+defm : X86WriteResUnsupported;
+
+defm : X86WriteResUnsupported;
+defm : BnWriteResFPU;
+defm : X86WriteResUnsupported;
+
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG.
+
+defm : BnWriteResFPUPair; // FIXME: split
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair;
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair; // split
+defm : BnWriteResFPUPair; // split
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair; // split
+defm : BnWriteResFPUPair; // split
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair; // FIXME: latency is probably wrong, not accounting for the secondary helper instruction.
+
+defm : BnWriteResFPUPair;
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair;
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : X86WriteResPairUnsupported;
+
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair;
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair;
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair;
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair;
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair;
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair;
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair; // FIXME: latency not from llvm-exegesis, from Agner.
+defm : BnWriteResFPUPair; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair; // FIXME: split
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair; // FIXME: split
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+////////////////////////////////////////////////////////////////////////////////
+// Conversions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : BnWriteResFPUPair; // FIXME: latency is probably wrong, not accounting for the secondary helper instruction.
+
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair; // FIXME: latency is probably wrong, not accounting for the secondary helper instruction.
+
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair; // FIXME: Ld is 1 uop *less*
+
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair; // FIXME: Ld is 1 uop *less*
+
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair;
+
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair; // FIXME: Ld is 1 uop *less*
+
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : X86WriteResUnsupported;
+defm : X86WriteResUnsupported;
+defm : X86WriteResUnsupported;
+
+defm : X86WriteResUnsupported;
+defm : X86WriteResUnsupported;
+defm : X86WriteResUnsupported;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector integer operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from Agner.
+defm : X86WriteResUnsupported;
+
+defm : BnWriteResFPU; // FIXME: latency/uops not from llvm-exegesis, from AMD SOG/Agner.
+defm : X86WriteResUnsupported;
+
+defm : X86WriteResUnsupported;
+defm : X86WriteResUnsupported;
+
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner.
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from Agner.
+defm : X86WriteResUnsupported;
+
+defm : BnWriteResFPU; // FIXME: latency not from llvm-exegesis, from AMD SOG/Agner. // FIXME: split
+defm : X86WriteResUnsupported;
+
+defm : X86WriteResUnsupported;
+defm : X86WriteResUnsupported;
+
+defm : BnWriteResFPU;
+defm : BnWriteResFPU;
+defm : X86WriteResUnsupported;
+
+defm : BnWriteResFPU;
+defm : BnWriteResFPU; // FIXME: latency is probably wrong, not accounting for the secondary helper instruction.
+
+defm : BnWriteResFPUPair;
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported;
+
+defm : BnWriteResFPUPair;
+defm : BnWriteResFPUPair;
+defm : X86WriteResPairUnsupported;
+defm : X86WriteResPairUnsupported