Index: llvm/trunk/include/llvm/CodeGen/RegisterPressure.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/RegisterPressure.h +++ llvm/trunk/include/llvm/CodeGen/RegisterPressure.h @@ -156,7 +156,7 @@ const_iterator begin() const { return &PressureChanges[0]; } const_iterator end() const { return &PressureChanges[MaxPSets]; } - void addPressureChange(unsigned RegUnit, bool IsDec, + void addPressureChange(RegisterMaskPair P, bool IsDec, const MachineRegisterInfo *MRI); void dump(const TargetRegisterInfo &TRI) const; Index: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h @@ -30,6 +30,7 @@ class BitVector; class MachineFunction; +class MachineRegisterInfo; class RegScavenger; template class SmallVectorImpl; class VirtRegMap; @@ -719,6 +720,12 @@ /// Get the weight in units of pressure for this register unit. virtual unsigned getRegUnitWeight(unsigned RegUnit) const = 0; + /// Get the weight in units of pressure for a sub register of this register + /// unit given a lane mask. + virtual unsigned getRegUnitWeight(const MachineRegisterInfo &MRI, + unsigned RegUnit, + LaneBitmask LaneMask) const; + /// Get the number of dimensions of register pressure. virtual unsigned getNumRegPressureSets() const = 0; Index: llvm/trunk/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp @@ -1085,7 +1085,7 @@ continue; PressureDiff &PDiff = getPressureDiff(&SU); - PDiff.addPressureChange(Reg, Decrement, &MRI); + PDiff.addPressureChange(P, Decrement, &MRI); DEBUG( dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask) @@ -1123,7 +1123,7 @@ LI.Query(LIS->getInstructionIndex(*SU->getInstr())); if (LRQ.valueIn() == VNI) { PressureDiff &PDiff = getPressureDiff(SU); - PDiff.addPressureChange(Reg, true, &MRI); + PDiff.addPressureChange(P, true, &MRI); DEBUG( dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " << *SU->getInstr(); Index: llvm/trunk/lib/CodeGen/RegisterPressure.cpp =================================================================== --- llvm/trunk/lib/CodeGen/RegisterPressure.cpp +++ llvm/trunk/lib/CodeGen/RegisterPressure.cpp @@ -46,16 +46,29 @@ using namespace llvm; +/// Clamp lane masks to maximum posible value. +static void clampMasks(const MachineRegisterInfo &MRI, unsigned Reg, + LaneBitmask& LaneMask1, LaneBitmask& LaneMask2) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + LaneBitmask Max = MRI.getMaxLaneMaskForVReg(Reg); + LaneMask1 &= Max; + LaneMask2 &= Max; + } +} + /// Increase pressure for each pressure set provided by TargetRegisterInfo. static void increaseSetPressure(std::vector &CurrSetPressure, const MachineRegisterInfo &MRI, unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask) { assert((PrevMask & ~NewMask).none() && "Must not remove bits"); - if (PrevMask.any() || NewMask.none()) + + clampMasks(MRI, Reg, PrevMask, NewMask); + if ((NewMask & ~PrevMask).none()) return; + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + unsigned Weight = TRI->getRegUnitWeight(MRI, Reg, NewMask & ~PrevMask); PSetIterator PSetI = MRI.getPressureSets(Reg); - unsigned Weight = PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) CurrSetPressure[*PSetI] += Weight; } @@ -65,11 +78,13 @@ const MachineRegisterInfo &MRI, unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask) { //assert((NewMask & !PrevMask) == 0 && "Must not add bits"); - if (NewMask.any() || PrevMask.none()) + clampMasks(MRI, Reg, PrevMask, NewMask); + if ((~NewMask & PrevMask).none()) return; + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + unsigned Weight = TRI->getRegUnitWeight(MRI, Reg, ~NewMask & PrevMask); PSetIterator PSetI = MRI.getPressureSets(Reg); - unsigned Weight = PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) { assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow"); CurrSetPressure[*PSetI] -= Weight; @@ -139,11 +154,14 @@ void RegPressureTracker::increaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask) { - if (PreviousMask.any() || NewMask.none()) + clampMasks(*MRI, RegUnit, PreviousMask, NewMask); + if ((NewMask & ~PreviousMask).none()) return; + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + unsigned Weight = TRI->getRegUnitWeight(*MRI, RegUnit, + NewMask & ~PreviousMask); PSetIterator PSetI = MRI->getPressureSets(RegUnit); - unsigned Weight = PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) { CurrSetPressure[*PSetI] += Weight; P.MaxSetPressure[*PSetI] = @@ -644,17 +662,19 @@ PressureDiff &PDiff = (*this)[Idx]; assert(!PDiff.begin()->isValid() && "stale PDiff"); for (const RegisterMaskPair &P : RegOpers.Defs) - PDiff.addPressureChange(P.RegUnit, true, &MRI); + PDiff.addPressureChange(P, true, &MRI); for (const RegisterMaskPair &P : RegOpers.Uses) - PDiff.addPressureChange(P.RegUnit, false, &MRI); + PDiff.addPressureChange(P, false, &MRI); } /// Add a change in pressure to the pressure diff of a given instruction. -void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, +void PressureDiff::addPressureChange(RegisterMaskPair P, bool IsDec, const MachineRegisterInfo *MRI) { - PSetIterator PSetI = MRI->getPressureSets(RegUnit); - int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight(); + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + int Weight = (int)TRI->getRegUnitWeight(*MRI, P.RegUnit, P.LaneMask); + PSetIterator PSetI = MRI->getPressureSets(P.RegUnit); + if (IsDec) Weight = -Weight; for (; PSetI.isValid(); ++PSetI) { // Find an existing entry in the pressure diff for this PSet. PressureDiff::iterator I = nonconst_begin(), E = nonconst_end(); Index: llvm/trunk/lib/CodeGen/TargetRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TargetRegisterInfo.cpp +++ llvm/trunk/lib/CodeGen/TargetRegisterInfo.cpp @@ -412,6 +412,15 @@ return true; } +/// Get the weight in units of pressure for a sub register of this register +/// unit given a lane mask. +unsigned TargetRegisterInfo::getRegUnitWeight(const MachineRegisterInfo &MRI, + unsigned RegUnit, + LaneBitmask LaneMask) const { + PSetIterator PSetI = MRI.getPressureSets(RegUnit); + return PSetI.getWeight(); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h @@ -228,6 +228,10 @@ const int *getRegUnitPressureSets(unsigned RegUnit) const override; + unsigned getRegUnitWeight(const MachineRegisterInfo &MRI, + unsigned RegUnit, + LaneBitmask LaneMask) const override; + private: void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; @@ -1408,3 +1409,18 @@ return Empty; return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit); } + +unsigned SIRegisterInfo::getRegUnitWeight(const MachineRegisterInfo &MRI, + unsigned RegUnit, + LaneBitmask LaneMask) const { + unsigned Weight = TargetRegisterInfo::getRegUnitWeight(MRI, RegUnit, + LaneMask); + if (Weight > 1 && LaneMask.any() && !LaneMask.all() && + isVirtualRegister(RegUnit)) { + LaneBitmask Max = MRI.getMaxLaneMaskForVReg(RegUnit); + if (Max != LaneMask && !Max.all() && !Max.none()) + Weight = (Weight * countPopulation(LaneMask.getAsInteger())) / + countPopulation(Max.getAsInteger()); + } + return Weight; +} Index: llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll +++ llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll @@ -424,25 +424,25 @@ ; GCN-NOHSA: buffer_store_dwordx4 ; GCN-NOHSA: buffer_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 define void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 { %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in Index: llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure-subregs.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure-subregs.mir +++ llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure-subregs.mir @@ -0,0 +1,67 @@ +# RUN: llc -march=amdgcn -misched=converge -run-pass machine-scheduler -verify-misched %s -o - -debug-only=misched 2>&1 | FileCheck %s +# REQUIRES: asserts + +# CHECK-LABEL: ScheduleDAGMILive::schedule starting + +# Check that def and use subregs count with the same weight +# CHECK: %vreg9:sub1 = V_MUL_LO_I32 %vreg6:sub1, 3 +# CHECK: Pressure Diff : {{$}} + +# Check that a subreg does not count as a whole superreg +# CHECK: %vreg9:sub0 = V_MUL_LO_I32 %vreg6:sub0, %vreg9:sub1 +# CHECK: Pressure Diff : VGPR_32 1{{$}} + +# Check that two subregs of the same register count as a whole register +# CHECK: DS_WRITE2_B32 %vreg7, %vreg9:sub0, %vreg9:sub1 +# CHECK: Pressure Diff : VGPR_32 3{{$}} + +--- +name: mo_pset +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_128 } + - { id: 1, class: sgpr_64 } + - { id: 2, class: sreg_32_xm0 } + - { id: 3, class: sgpr_32 } + - { id: 4, class: vgpr_32 } + - { id: 5, class: sreg_32_xm0_xexec } + - { id: 6, class: vreg_64 } + - { id: 7, class: vgpr_32 } + - { id: 8, class: vgpr_32 } + - { id: 9, class: vreg_64 } +liveins: + - { reg: '%sgpr4_sgpr5', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0: + liveins: %sgpr4_sgpr5 + + %1 = COPY %sgpr4_sgpr5 + %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`) + %m0 = S_MOV_B32 -1 + %7 = COPY %5 + %6 = DS_READ2_B32 %7, 0, 1, 0, implicit %m0, implicit %exec + undef %9.sub1 = V_MUL_LO_I32 %6.sub1, 3, implicit %exec + %9.sub0 = V_MUL_LO_I32 %6.sub0, %9.sub1, implicit %exec + DS_WRITE2_B32 %7, %9.sub0, %9.sub1, 4, 5, 0, implicit killed %m0, implicit %exec + S_ENDPGM + +...