Index: include/llvm/CodeGen/RegisterPressure.h =================================================================== --- include/llvm/CodeGen/RegisterPressure.h +++ include/llvm/CodeGen/RegisterPressure.h @@ -143,7 +143,7 @@ const_iterator begin() const { return &PressureChanges[0]; } const_iterator end() const { return &PressureChanges[MaxPSets]; } - void addPressureChange(unsigned RegUnit, bool IsDec, + void addPressureChange(RegisterMaskPair P, bool IsDec, const MachineRegisterInfo *MRI); void dump(const TargetRegisterInfo &TRI) const; Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -1046,7 +1046,7 @@ continue; PressureDiff &PDiff = getPressureDiff(&SU); - PDiff.addPressureChange(Reg, Decrement, &MRI); + PDiff.addPressureChange(P, Decrement, &MRI); DEBUG( dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask) @@ -1084,7 +1084,7 @@ LI.Query(LIS->getInstructionIndex(*SU->getInstr())); if (LRQ.valueIn() == VNI) { PressureDiff &PDiff = getPressureDiff(SU); - PDiff.addPressureChange(Reg, true, &MRI); + PDiff.addPressureChange(P, true, &MRI); DEBUG( dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " << *SU->getInstr(); Index: lib/CodeGen/RegisterPressure.cpp =================================================================== --- lib/CodeGen/RegisterPressure.cpp +++ lib/CodeGen/RegisterPressure.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -621,17 +622,25 @@ PressureDiff &PDiff = (*this)[Idx]; assert(!PDiff.begin()->isValid() && "stale PDiff"); for (const RegisterMaskPair &P : RegOpers.Defs) - PDiff.addPressureChange(P.RegUnit, true, &MRI); + PDiff.addPressureChange(P, true, &MRI); for (const RegisterMaskPair &P : RegOpers.Uses) - PDiff.addPressureChange(P.RegUnit, false, &MRI); + PDiff.addPressureChange(P, false, &MRI); } /// Add a change in pressure to the pressure diff of a given instruction. -void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, +void PressureDiff::addPressureChange(RegisterMaskPair P, bool IsDec, const MachineRegisterInfo *MRI) { - PSetIterator PSetI = MRI->getPressureSets(RegUnit); - int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight(); + PSetIterator PSetI = MRI->getPressureSets(P.RegUnit); + int Weight = PSetI.getWeight(); + if (Weight > 1 && P.LaneMask.any() && !P.LaneMask.all() && + TargetRegisterInfo::isVirtualRegister(P.RegUnit)) { + LaneBitmask Max = MRI->getMaxLaneMaskForVReg(P.RegUnit); + if (Max != P.LaneMask && !Max.all() && !Max.none()) + Weight = (Weight * countPopulation(P.LaneMask.getAsInteger())) / + countPopulation(Max.getAsInteger()); + } + if (IsDec) Weight = -Weight; for (; PSetI.isValid(); ++PSetI) { // Find an existing entry in the pressure diff for this PSet. PressureDiff::iterator I = nonconst_begin(), E = nonconst_end(); Index: test/CodeGen/AMDGPU/schedule-regpressure.mir =================================================================== --- test/CodeGen/AMDGPU/schedule-regpressure.mir +++ test/CodeGen/AMDGPU/schedule-regpressure.mir @@ -1,12 +1,23 @@ # RUN: llc -march=amdgcn -misched=converge -run-pass machine-scheduler %s -o - -debug-only=misched 2>&1 | FileCheck %s # REQUIRES: asserts +# CHECK-LABEL: ScheduleDAGMILive::schedule starting + # Check there is no SReg_32 pressure created by DS_* instructions because of M0 use +# CHECK: DS_READ2_B32 {{.*}} %M0, %EXEC +# CHECK: Pressure Diff : VGPR_32 -1{{$}} -# CHECK: ScheduleDAGMILive::schedule starting -# CHECK: SU({{.*}} = DS_READ_B32 {{.*}} %M0, %EXEC +# Check that def and use subregs count with the same weight +# CHECK: %vreg9:sub1 = V_MUL_LO_I32 %vreg6:sub1, 3 # CHECK: Pressure Diff : {{$}} -# CHECK: SU({{.*}} DS_WRITE_B32 + +# Check that a subreg does not count as a whole superreg +# CHECK: %vreg9:sub0 = V_MUL_LO_I32 %vreg6:sub0, %vreg9:sub1 +# CHECK: Pressure Diff : VGPR_32 1{{$}} + +# Check that two subregs of the same register count as a whole register +# CHECK: DS_WRITE2_B32 %vreg7, %vreg9:sub0, %vreg9:sub1 +# CHECK: Pressure Diff : VGPR_32 3{{$}} --- name: mo_pset @@ -23,9 +34,10 @@ - { id: 3, class: sgpr_32 } - { id: 4, class: vgpr_32 } - { id: 5, class: sreg_32_xm0_xexec } - - { id: 6, class: vgpr_32 } + - { id: 6, class: vreg_64 } - { id: 7, class: vgpr_32 } - { id: 8, class: vgpr_32 } + - { id: 9, class: vreg_64 } liveins: - { reg: '%sgpr4_sgpr5', virtual-reg: '%1' } frameInfo: @@ -50,8 +62,10 @@ %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`) %m0 = S_MOV_B32 -1 %7 = COPY %5 - %6 = DS_READ_B32 %7, 0, 0, implicit %m0, implicit %exec - DS_WRITE_B32 %7, %6, 4, 0, implicit killed %m0, implicit %exec + %6 = DS_READ2_B32 %7, 0, 1, 0, implicit %m0, implicit %exec + undef %9.sub1 = V_MUL_LO_I32 %6.sub1, 3, implicit %exec + %9.sub0 = V_MUL_LO_I32 %6.sub0, %9.sub1, implicit %exec + DS_WRITE2_B32 %7, %9.sub0, %9.sub1, 4, 5, 0, implicit killed %m0, implicit %exec S_ENDPGM ...