diff --git a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp --- a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp +++ b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp @@ -21,8 +21,8 @@ RetireControlUnit::RetireControlUnit(const MCSchedModel &SM) : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), - NumROBEntries(SM.MicroOpBufferSize), - AvailableEntries(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { + AvailableEntries(SM.isOutOfOrder() ? SM.MicroOpBufferSize : 0), + MaxRetirePerCycle(0) { // Check if the scheduling model provides extra information about the machine // processor. If so, then use that information to set the reorder buffer size // and the maximum number of instructions retired per cycle. @@ -33,8 +33,7 @@ MaxRetirePerCycle = EPI.MaxRetirePerCycle; } NumROBEntries = AvailableEntries; - bool IsOutOfOrder = SM.MicroOpBufferSize; - if (!IsOutOfOrder && !NumROBEntries) + if (!SM.isOutOfOrder() && !NumROBEntries) return; assert(NumROBEntries && "Invalid reorder buffer size!"); Queue.resize(2 * NumROBEntries); diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s @@ -0,0 +1,68 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx1010 --timeline --iterations=1 < %s | FileCheck %s + +v_add_f32 v0, v0, v0 +v_add_f32 v1, v1, v1 +v_add_f32 v2, v1, v0 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 3 +# CHECK-NEXT: Total Cycles: 13 +# CHECK-NEXT: Total uOps: 3 + +# CHECK: Dispatch Width: 1 +# CHECK-NEXT: uOps Per Cycle: 0.23 +# CHECK-NEXT: IPC: 0.23 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 1.00 U v_add_f32_e32 v0, v0, v0 +# CHECK-NEXT: 1 5 1.00 U v_add_f32_e32 v1, v1, v1 +# CHECK-NEXT: 1 5 1.00 U v_add_f32_e32 v2, v1, v0 + +# CHECK: Resources: +# CHECK-NEXT: [0] - HWBranch +# CHECK-NEXT: [1] - HWExport +# CHECK-NEXT: [2] - HWLGKM +# CHECK-NEXT: [3] - HWRC +# CHECK-NEXT: [4] - HWSALU +# CHECK-NEXT: [5] - HWVALU +# CHECK-NEXT: [6] - HWVMEM + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] +# CHECK-NEXT: - - - 3.00 - 3.00 - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v0, v0, v0 +# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v1, v1, v1 +# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v2, v1, v0 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . . v_add_f32_e32 v0, v0, v0 +# CHECK-NEXT: [0,1] .DeeeeER . . v_add_f32_e32 v1, v1, v1 +# CHECK-NEXT: [0,2] . .DeeeeER v_add_f32_e32 v2, v1, v0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 v_add_f32_e32 v0, v0, v0 +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 v_add_f32_e32 v1, v1, v1 +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 v_add_f32_e32 v2, v1, v0 +# CHECK-NEXT: 1 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/AMDGPU/lit.local.cfg b/llvm/test/tools/llvm-mca/AMDGPU/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True