diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -36,10 +36,10 @@ const RegPressureTracker &RPTracker, SchedCandidate &Cand); - void initCandidate(SchedCandidate &Cand, SUnit *SU, - bool AtTop, const RegPressureTracker &RPTracker, - const SIRegisterInfo *SRI, - unsigned SGPRPressure, unsigned VGPRPressure); + void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, + const RegPressureTracker &RPTracker, + const SIRegisterInfo *SRI, unsigned SGPRPressure, + unsigned VGPRPressure); std::vector Pressure; std::vector MaxPressure; diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -59,15 +59,10 @@ VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit); } -void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, - bool AtTop, const RegPressureTracker &RPTracker, - const SIRegisterInfo *SRI, - unsigned SGPRPressure, - unsigned VGPRPressure) { - - Cand.SU = SU; - Cand.AtTop = AtTop; - +void GCNMaxOccupancySchedStrategy::initCandidate( + SchedCandidate &Cand, SUnit *SU, bool AtTop, + const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, + unsigned SGPRPressure, unsigned VGPRPressure) { // getDownwardPressure() and getUpwardPressure() make temporary changes to // the tracker, so we need to pass those function a non-const copy. RegPressureTracker &TempTracker = const_cast(RPTracker); @@ -150,14 +145,17 @@ SchedCandidate &Cand) { const SIRegisterInfo *SRI = static_cast(TRI); ArrayRef Pressure = RPTracker.getRegSetPressureAtPos(); - unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; - unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; ReadyQueue &Q = Zone.Available; for (SUnit *SU : Q) { - SchedCandidate TryCand(ZonePolicy); - initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, - SGPRPressure, VGPRPressure); + TryCand.SU = SU; + TryCand.AtTop = Zone.isTop(); + + if (!Pressure.empty()) + initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, + Pressure[AMDGPU::RegisterPressureSets::SReg_32], + Pressure[AMDGPU::RegisterPressureSets::VGPR_32]); + // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -510,25 +510,23 @@ ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 ; GPRIDX-NEXT: s_mov_b32 s2, s4 -; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 -; GPRIDX-NEXT: s_mov_b32 s5, s7 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 -; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GPRIDX-NEXT: s_mov_b32 s6, s8 -; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s19, s9 ; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GPRIDX-NEXT: s_mov_b32 s8, s10 ; GPRIDX-NEXT: s_mov_b32 s9, s11 ; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s19 ; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 @@ -564,25 +562,23 @@ ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 ; MOVREL-NEXT: s_mov_b32 s2, s4 -; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 -; MOVREL-NEXT: s_mov_b32 s5, s7 ; MOVREL-NEXT: v_mov_b32_e32 v1, s0 ; MOVREL-NEXT: v_mov_b32_e32 v2, s1 ; MOVREL-NEXT: v_mov_b32_e32 v3, s2 -; MOVREL-NEXT: v_mov_b32_e32 v4, s3 +; MOVREL-NEXT: v_mov_b32_e32 v4, s5 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; MOVREL-NEXT: s_mov_b32 s6, s8 -; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s19, s9 ; MOVREL-NEXT: v_mov_b32_e32 v5, s4 -; MOVREL-NEXT: v_mov_b32_e32 v6, s5 +; MOVREL-NEXT: v_mov_b32_e32 v6, s7 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 ; MOVREL-NEXT: v_mov_b32_e32 v7, s6 -; MOVREL-NEXT: v_mov_b32_e32 v8, s7 +; MOVREL-NEXT: v_mov_b32_e32 v8, s19 ; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 @@ -2509,23 +2505,21 @@ ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mov_b32_e32 v2, s1 ; GCN-NEXT: v_mov_b32_e32 v3, s2 -; GCN-NEXT: v_mov_b32_e32 v4, s3 +; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s15, s9 ; GCN-NEXT: v_mov_b32_e32 v5, s4 -; GCN-NEXT: v_mov_b32_e32 v6, s5 +; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v7, s6 -; GCN-NEXT: v_mov_b32_e32 v8, s7 +; GCN-NEXT: v_mov_b32_e32 v8, s15 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 @@ -2761,25 +2755,23 @@ ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mov_b32_e32 v2, s1 ; GCN-NEXT: v_mov_b32_e32 v3, s2 -; GCN-NEXT: v_mov_b32_e32 v4, s3 +; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s19, s9 ; GCN-NEXT: v_mov_b32_e32 v5, s4 -; GCN-NEXT: v_mov_b32_e32 v6, s5 +; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: s_mov_b32 s8, s10 ; GCN-NEXT: s_mov_b32 s9, s11 ; GCN-NEXT: v_mov_b32_e32 v7, s6 -; GCN-NEXT: v_mov_b32_e32 v8, s7 +; GCN-NEXT: v_mov_b32_e32 v8, s19 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 @@ -3551,40 +3543,38 @@ ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mov_b32_e32 v2, s1 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: v_mov_b32_e32 v3, s2 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: v_mov_b32_e32 v4, s3 +; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: s_mov_b32 s5, s7 ; GCN-NEXT: v_mov_b32_e32 v5, s4 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: v_mov_b32_e32 v6, s5 +; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s19, s9 ; GCN-NEXT: v_mov_b32_e32 v7, s6 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 ; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: v_mov_b32_e32 v8, s7 +; GCN-NEXT: v_mov_b32_e32 v8, s19 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 ; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: v_mov_b32_e32 v9, s8 +; GCN-NEXT: v_mov_b32_e32 v10, s8 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 ; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: v_mov_b32_e32 v10, s9 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; GCN-NEXT: v_mov_b32_e32 v9, s9 +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 ; GCN-NEXT: v_mov_b32_e32 v11, s10 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 ; GCN-NEXT: v_mov_b32_e32 v12, s13 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc @@ -3625,7 +3615,7 @@ ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 ; GFX10PLUS-NEXT: s_mov_b32 s13, s15 -; GFX10PLUS-NEXT: s_mov_b32 s14, s16 +; GFX10PLUS-NEXT: s_mov_b32 s18, s16 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo @@ -3646,7 +3636,7 @@ ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s13, vcc_lo ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s14, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s18, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> %vec, i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir --- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir +++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir @@ -1,4 +1,5 @@ # RUN: llc -march=amdgcn -misched=converge -run-pass machine-scheduler %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck %s +# RUN: llc -march=amdgcn -misched-regpressure=false -run-pass machine-scheduler %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck %s --check-prefix=NORP # REQUIRES: asserts # Check there is no SReg_32 pressure created by DS_* instructions because of M0 use @@ -8,6 +9,19 @@ # CHECK: Pressure Diff : {{$}} # CHECK: SU({{.*}} DS_WRITE_B32 +# NORP: ScheduleDAGMILive::schedule starting +# NORP: GenericScheduler RegionPolicy: ShouldTrackPressure=0 +# NORP-NOT: Pressure Diff : {{$}} +# NORP-NOT: Pressure Diff : {{$}} +# NORP-NOT: Pressure Diff : {{$}} +# NORP-NOT: Pressure Diff : {{$}} +# NORP-NOT: Pressure Diff : {{$}} +# NORP-NOT: Pressure Diff : {{$}} +# NORP-NOT: Bottom Pressure: +# NORP-NOT: UpdateRegP +# NORP-NOT: UpdateRegP +# NORP-NOT: UpdateRegP + --- name: mo_pset alignment: 1