Index: llvm/include/llvm/CodeGen/RegisterBankInfo.h =================================================================== --- llvm/include/llvm/CodeGen/RegisterBankInfo.h +++ llvm/include/llvm/CodeGen/RegisterBankInfo.h @@ -435,7 +435,7 @@ /// Get the MinimalPhysRegClass for Reg. /// \pre Reg is a physical register. - const TargetRegisterClass & + const TargetRegisterClass * getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const; /// Try to get the mapping of \p MI. Index: llvm/lib/CodeGen/RegisterBankInfo.cpp =================================================================== --- llvm/lib/CodeGen/RegisterBankInfo.cpp +++ llvm/lib/CodeGen/RegisterBankInfo.cpp @@ -80,12 +80,14 @@ RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { if (Reg.isPhysical()) { + assert(Reg && "NoRegister does not have a register bank"); + // FIXME: This was probably a copy to a virtual register that does have a // type we could use. - return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT()); + const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg, TRI); + return RC ? &getRegBankFromRegClass(*RC, LLT()) : nullptr; } - assert(Reg && "NoRegister does not have a register bank"); const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); if (auto *RB = RegClassOrBank.dyn_cast()) return RB; @@ -94,16 +96,16 @@ return nullptr; } -const TargetRegisterClass & +const TargetRegisterClass * RegisterBankInfo::getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const { assert(Reg.isPhysical() && "Reg must be a physreg"); const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); if (RegRCIt != PhysRegMinimalRCs.end()) - return *RegRCIt->second; - const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg); + return RegRCIt->second; + const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClassLLT(Reg, LLT()); PhysRegMinimalRCs[Reg] = PhysRC; - return *PhysRC; + return PhysRC; } const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( @@ -498,7 +500,7 @@ // Instead, we need to access a register class that contains Reg and // get the size of that register class. // Because this is expensive, we'll cache the register class by calling - auto *RC = &getMinimalPhysRegClass(Reg, TRI); + auto *RC = getMinimalPhysRegClass(Reg, TRI); assert(RC && "Expecting Register class"); return TRI.getRegSizeInBits(*RC); } Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H #include "AMDGPUCallLowering.h" +#include "AMDGPURegisterBankInfo.h" #include "AMDGPUSubtarget.h" #include "SIFrameLowering.h" #include "SIISelLowering.h" @@ -51,7 +52,7 @@ std::unique_ptr InlineAsmLoweringInfo; std::unique_ptr InstSelector; std::unique_ptr Legalizer; - std::unique_ptr RegBankInfo; + std::unique_ptr RegBankInfo; protected: // Basic subtarget description. @@ -246,7 +247,7 @@ return Legalizer.get(); } - const RegisterBankInfo *getRegBankInfo() const override { + const AMDGPURegisterBankInfo *getRegBankInfo() const override { return RegBankInfo.get(); } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -8404,6 +8404,22 @@ InstructionUniformity SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const { + unsigned opcode = MI.getOpcode(); + if (MI.isCopy()) { + const MachineOperand &srcOp = MI.getOperand(1); + if (srcOp.isReg() && srcOp.getReg().isPhysical()) { + const TargetRegisterClass *regClass = + RI.getPhysRegBaseClass(srcOp.getReg()); + return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform + : InstructionUniformity::NeverUniform; + } + return InstructionUniformity::Default; + } + + // GMIR handling + if (MI.isPreISelOpcode()) + return SIInstrInfo::getGenericInstructionUniformity(MI); + // Atomics are divergent because they are executed sequentially: when an // atomic operation refers to the same address in each thread, then each // thread after the first sees the value written by the previous thread as @@ -8430,49 +8446,32 @@ return InstructionUniformity::Default; } - unsigned opcode = MI.getOpcode(); - if (opcode == AMDGPU::COPY) { - const MachineOperand &srcOp = MI.getOperand(1); - if (srcOp.isReg() && srcOp.getReg().isPhysical()) { - const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg()); - return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform - : InstructionUniformity::NeverUniform; - } - return InstructionUniformity::Default; - } - if (opcode == AMDGPU::INLINEASM || opcode == AMDGPU::INLINEASM_BR) { - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - for (auto &op : MI.operands()) { - if (!op.isReg() || !op.isDef()) - continue; - auto *RC = MRI.getRegClass(op.getReg()); - if (!RC || RI.isDivergentRegClass(RC)) - return InstructionUniformity::NeverUniform; - } - return InstructionUniformity::AlwaysUniform; - } if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32) return InstructionUniformity::AlwaysUniform; if (opcode == AMDGPU::V_WRITELANE_B32) return InstructionUniformity::NeverUniform; - // GMIR handling - if (SIInstrInfo::isGenericOpcode(opcode)) - return SIInstrInfo::getGenericInstructionUniformity(MI); - - // Handling $vpgr reads - for (auto srcOp : MI.operands()) { - if (srcOp.isReg() && srcOp.getReg().isPhysical()) { - const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg()); + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo(); + + // FIXME: It's conceptually broken to report this for an instruction, and not + // a specific def operand. For inline asm in particular, there could be mixed + // uniform and divergent results. + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &SrcOp = MI.getOperand(I); + if (!SrcOp.isReg()) + continue; - // If the class is missing it's an unallocatable scalar of some kind. - if (!regClass) - continue; + Register Reg = SrcOp.getReg(); + if (!Reg || !SrcOp.readsReg()) + continue; - if (RI.isVGPRClass(regClass)) - return InstructionUniformity::NeverUniform; - } + // If RegBank is null, this is unassigned or an unallocatable special + // register, which are all scalars. + const RegisterBank *RegBank = RBI->getRegBank(Reg, MRI, RI); + if (RegBank && RegBank->getID() != AMDGPU::SGPRRegBankID) + return InstructionUniformity::NeverUniform; } // TODO: Uniformity check condtions above can be rearranged for more