diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -107,6 +107,9 @@ def HWVALU : ProcResource<1> { let BufferSize = 1; } +def HWTransVALU : ProcResource<1> { // Transcendental VALU + let BufferSize = 1; +} def HWRC : ProcResource<1> { // Register destination cache let BufferSize = 1; } @@ -240,14 +243,14 @@ def : HWWriteRes; def : HWWriteRes; def : HWWriteRes; -def : HWWriteRes; +def : HWWriteRes; def : HWWriteRes; def : HWWriteRes; def : HWWriteRes; def : HWWriteRes; def : HWWriteRes; def : HWWriteRes; -def : HWWriteRes; +def : HWWriteRes; def : HWWriteRes; def : HWWriteRes; diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s --- a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s +++ b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s @@ -34,18 +34,19 @@ # CHECK-NEXT: [2] - HWLGKM # CHECK-NEXT: [3] - HWRC # CHECK-NEXT: [4] - HWSALU -# CHECK-NEXT: [5] - HWVALU -# CHECK-NEXT: [6] - HWVMEM +# CHECK-NEXT: [5] - HWTransVALU +# CHECK-NEXT: [6] - HWVALU +# CHECK-NEXT: [7] - HWVMEM # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] -# CHECK-NEXT: - - - 3.00 - 3.00 - +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - - 3.00 - - 3.00 - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v0, v0, v0 -# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v1, v1, v1 -# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v2, v1, v0 +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_add_f32_e32 v0, v0, v0 +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_add_f32_e32 v1, v1, v1 +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_add_f32_e32 v2, v1, v0 # CHECK: Timeline view: # CHECK-NEXT: 01 diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-double.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-double.s --- a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-double.s +++ b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-double.s @@ -93,43 +93,44 @@ # CHECK-NEXT: [2] - HWLGKM # CHECK-NEXT: [3] - HWRC # CHECK-NEXT: [4] - HWSALU -# CHECK-NEXT: [5] - HWVALU -# CHECK-NEXT: [6] - HWVMEM +# CHECK-NEXT: [5] - HWTransVALU +# CHECK-NEXT: [6] - HWVALU +# CHECK-NEXT: [7] - HWVMEM # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] -# CHECK-NEXT: - - - 29.00 1.00 28.00 - +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - - 29.00 1.00 3.00 28.00 - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_i32_f64_e32 v0, v[0:1] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f64_i32_e32 v[2:3], v2 -# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f32_f64_e32 v4, v[4:5] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f64_f32_e32 v[6:7], v6 -# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_u32_f64_e32 v8, v[8:9] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_cvt_f64_u32_e32 v[10:11], v10 -# CHECK-NEXT: - - - 1.00 - 1.00 - v_frexp_exp_i32_f64_e32 v0, v[0:1] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_frexp_mant_f64_e32 v[2:3], v[2:3] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_fract_f64_e32 v[4:5], v[4:5] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_trunc_f64_e32 v[0:1], v[0:1] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_ceil_f64_e32 v[2:3], v[2:3] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_rndne_f64_e32 v[4:5], v[4:5] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_floor_f64_e32 v[6:7], v[6:7] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f64 v[2:3], v[2:3], v[2:3] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_mul_f64 v[4:5], v[4:5], v[4:5] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_min_f64 v[6:7], v[6:7], v[6:7] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_max_f64 v[8:9], v[8:9], v[8:9] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_ldexp_f64 v[2:3], v[2:3], v0 -# CHECK-NEXT: - - - 2.00 1.00 1.00 - v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_trig_preop_f64 v[2:3], v[2:3], v0 -# CHECK-NEXT: - - - 1.00 - 1.00 - v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_cmp_class_f64_e64 vcc_lo, v[2:3], s0 -# CHECK-NEXT: - - - 1.00 - 1.00 - v_rcp_f64_e32 v[0:1], v[0:1] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_rsq_f64_e32 v[2:3], v[2:3] -# CHECK-NEXT: - - - 1.00 - 1.00 - v_sqrt_f64_e32 v[4:5], v[4:5] +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_i32_f64_e32 v0, v[0:1] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_i32_e32 v[2:3], v2 +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f32_f64_e32 v4, v[4:5] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_f32_e32 v[6:7], v6 +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_u32_f64_e32 v8, v[8:9] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cvt_f64_u32_e32 v[10:11], v10 +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_frexp_exp_i32_f64_e32 v0, v[0:1] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_frexp_mant_f64_e32 v[2:3], v[2:3] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_fract_f64_e32 v[4:5], v[4:5] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_trunc_f64_e32 v[0:1], v[0:1] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_ceil_f64_e32 v[2:3], v[2:3] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_rndne_f64_e32 v[4:5], v[4:5] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_floor_f64_e32 v[6:7], v[6:7] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_fma_f64 v[0:1], v[0:1], v[0:1], v[0:1] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_add_f64 v[2:3], v[2:3], v[2:3] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_mul_f64 v[4:5], v[4:5], v[4:5] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_min_f64 v[6:7], v[6:7], v[6:7] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_max_f64 v[8:9], v[8:9], v[8:9] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_div_fmas_f64 v[0:1], v[0:1], v[0:1], v[0:1] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_div_fixup_f64 v[0:1], v[0:1], v[0:1], v[0:1] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_ldexp_f64 v[2:3], v[2:3], v0 +# CHECK-NEXT: - - - 2.00 1.00 - 1.00 - v_div_scale_f64 v[0:1], vcc_lo, v[0:1], v[0:1], v[0:1] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_trig_preop_f64 v[2:3], v[2:3], v0 +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[0:1] +# CHECK-NEXT: - - - 1.00 - - 1.00 - v_cmp_class_f64_e64 vcc_lo, v[2:3], s0 +# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rcp_f64_e32 v[0:1], v[0:1] +# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rsq_f64_e32 v[2:3], v[2:3] +# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_sqrt_f64_e32 v[4:5], v[4:5] # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 0 diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-trans.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-trans.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-trans.s @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx1010 --timeline --iterations=1 < %s | FileCheck %s + +v_log_f32 v0, v0 +v_rcp_f32 v0, v0 +v_rsq_f32 v1, v1 +v_sqrt_f32 v2, v0 +v_rcp_f64 v[0:1], v[0:1] +v_rsq_f64 v[1:2], v[1:2] +v_sqrt_f64 v[2:3], v[0:1] + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 7 +# CHECK-NEXT: Total Cycles: 94 +# CHECK-NEXT: Total uOps: 7 + +# CHECK: Dispatch Width: 1 +# CHECK-NEXT: uOps Per Cycle: 0.07 +# CHECK-NEXT: IPC: 0.07 +# CHECK-NEXT: Block RThroughput: 7.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 10 1.00 U v_log_f32_e32 v0, v0 +# CHECK-NEXT: 1 10 1.00 U v_rcp_f32_e32 v0, v0 +# CHECK-NEXT: 1 10 1.00 U v_rsq_f32_e32 v1, v1 +# CHECK-NEXT: 1 10 1.00 U v_sqrt_f32_e32 v2, v0 +# CHECK-NEXT: 1 24 1.00 U v_rcp_f64_e32 v[0:1], v[0:1] +# CHECK-NEXT: 1 24 1.00 U v_rsq_f64_e32 v[1:2], v[1:2] +# CHECK-NEXT: 1 24 1.00 U v_sqrt_f64_e32 v[2:3], v[0:1] + +# CHECK: Resources: +# CHECK-NEXT: [0] - HWBranch +# CHECK-NEXT: [1] - HWExport +# CHECK-NEXT: [2] - HWLGKM +# CHECK-NEXT: [3] - HWRC +# CHECK-NEXT: [4] - HWSALU +# CHECK-NEXT: [5] - HWTransVALU +# CHECK-NEXT: [6] - HWVALU +# CHECK-NEXT: [7] - HWVMEM + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - - 7.00 - 7.00 3.00 - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - - 1.00 - 1.00 - - v_log_f32_e32 v0, v0 +# CHECK-NEXT: - - - 1.00 - 1.00 - - v_rcp_f32_e32 v0, v0 +# CHECK-NEXT: - - - 1.00 - 1.00 - - v_rsq_f32_e32 v1, v1 +# CHECK-NEXT: - - - 1.00 - 1.00 - - v_sqrt_f32_e32 v2, v0 +# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rcp_f64_e32 v[0:1], v[0:1] +# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_rsq_f64_e32 v[1:2], v[1:2] +# CHECK-NEXT: - - - 1.00 - 1.00 1.00 - v_sqrt_f64_e32 v[2:3], v[0:1] + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 + +# CHECK: [0,0] DeeeeeeeeeE . . . . . . . . . . . . v_log_f32_e32 v0, v0 +# CHECK-NEXT: [0,1] . . DeeeeeeeeeE . . . . . . . . . . v_rcp_f32_e32 v0, v0 +# CHECK-NEXT: [0,2] . . .DeeeeeeeeeE . . . . . . . . . . v_rsq_f32_e32 v1, v1 +# CHECK-NEXT: [0,3] . . . . DeeeeeeeeeE . . . . . . . . v_sqrt_f32_e32 v2, v0 +# CHECK-NEXT: [0,4] . . . . .DeeeeeeeeeeeeeeeeeeeeeeeE . . . . . v_rcp_f64_e32 v[0:1], v[0:1] +# CHECK-NEXT: [0,5] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeE v_rsq_f64_e32 v[1:2], v[1:2] + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 v_log_f32_e32 v0, v0 +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 v_rcp_f32_e32 v0, v0 +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 v_rsq_f32_e32 v1, v1 +# CHECK-NEXT: 3. 1 0.0 0.0 0.0 v_sqrt_f32_e32 v2, v0 +# CHECK-NEXT: 4. 1 0.0 0.0 0.0 v_rcp_f64_e32 v[0:1], v[0:1] +# CHECK-NEXT: 5. 1 0.0 0.0 0.0 v_rsq_f64_e32 v[1:2], v[1:2] +# CHECK-NEXT: 6. 1 0.0 0.0 0.0 v_sqrt_f64_e32 v[2:3], v[0:1] +# CHECK-NEXT: 1 0.0 0.0 0.0