diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3820,11 +3820,7 @@ ++ConstantBusCount; SmallVector SGPRsUsed; - Register SGPRUsed = findImplicitSGPRRead(MI); - if (SGPRUsed != AMDGPU::NoRegister) { - ++ConstantBusCount; - SGPRsUsed.push_back(SGPRUsed); - } + Register SGPRUsed; for (int OpIdx : OpIndices) { if (OpIdx == -1) @@ -3833,8 +3829,8 @@ if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) { if (MO.isReg()) { SGPRUsed = MO.getReg(); - if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) { - return !RI.regsOverlap(SGPRUsed, SGPR); + if (llvm::all_of(SGPRsUsed, [SGPRUsed](unsigned SGPR) { + return SGPRUsed != SGPR; })) { ++ConstantBusCount; SGPRsUsed.push_back(SGPRUsed); @@ -3845,6 +3841,18 @@ } } } + + SGPRUsed = findImplicitSGPRRead(MI); + if (SGPRUsed != AMDGPU::NoRegister) { + // Implicit uses may safely overlap true overands + if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) { + return !RI.regsOverlap(SGPRUsed, SGPR); + })) { + ++ConstantBusCount; + SGPRsUsed.push_back(SGPRUsed); + } + } + // v_writelane_b32 is an exception from constant bus restriction: // vsrc0 can be sgpr, const or m0 and lane select sgpr, m0 or inline-const if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&