Index: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td +++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td @@ -55,7 +55,6 @@ def ZnFPU3 : ProcResource<1>; // FPU grouping -def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>; def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>; def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>; def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>; @@ -91,6 +90,32 @@ // 4 Cycles load-to use Latency is captured def : ReadAdvance; +// The Integer PRF for Zen is 168 entries, and it holds the architectural and +// speculative version of the 64-bit integer registers. +// Reference: "Software Optimization Guide for AMD Family 17h Processors" +def ZnIntegerPRF : RegisterFile<168, [GR8, GR16, GR32, GR64, CCR]>; + +// 36 Entry (9x4 entries) floating-point Scheduler +def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]> { +let BufferSize=36; +} + +// The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit +// registers. Operations on 256-bit data types are cracked into two COPs. +// Reference: "Software Optimization Guide for AMD Family 17h Processors" +def ZnFpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; + +// The unit can track up to 192 macro ops in-flight. +// The retire unit handles in-order commit of up to 8 macro ops per cycle. +// Reference: "Software Optimization Guide for AMD Family 17h Processors" +// To be noted, the retire unit is shared between integer and FP ops. +// In SMT mode it is 96 entry per thread. But, we do not use the conservative +// value here because there is currently no way to fully mode the SMT mode, +// so there is no point in trying. +def ZnRCU : RetireControlUnit<192, 8>; + +// FIXME: there are 72 read buffers and 44 write buffers. + // (a folded load is an instruction that loads and does some operation) // Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops // Instructions with folded loads are usually micro-fused, so they only appear Index: llvm/trunk/test/tools/llvm-mca/X86/register-file-statistics.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/register-file-statistics.s +++ llvm/trunk/test/tools/llvm-mca/X86/register-file-statistics.s @@ -21,7 +21,17 @@ # BTVER2-NEXT: Total number of mappings created: 0 # BTVER2-NEXT: Max number of mappings used: 0 +# ZNVER1: * Register File #1 -- ZnFpuPRF: +# ZNVER1-NEXT: Number of physical registers: 160 +# ZNVER1-NEXT: Total number of mappings created: 0 +# ZNVER1-NEXT: Max number of mappings used: 0 + # BTVER2: * Register File #2 -- JIntegerPRF: # BTVER2-NEXT: Number of physical registers: 64 # BTVER2-NEXT: Total number of mappings created: 2 # BTVER2-NEXT: Max number of mappings used: 2 + +# ZNVER1: * Register File #2 -- ZnIntegerPRF: +# ZNVER1-NEXT: Number of physical registers: 168 +# ZNVER1-NEXT: Total number of mappings created: 2 +# ZNVER1-NEXT: Max number of mappings used: 2 Index: llvm/trunk/test/tools/llvm-mca/X86/scheduler-queue-usage.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ llvm/trunk/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -49,3 +49,4 @@ # ZNVER1: Scheduler's queue usage: # ZNVER1-NEXT: ZnAGU, 0/28 # ZNVER1-NEXT: ZnALU, 1/56 +# ZNVER1-NEXT: ZnFPU, 0/36