Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -668,7 +668,6 @@ } else { if (UseMI->isCopy() && OpToFold.isReg() && UseMI->getOperand(0).getReg().isVirtual() && - TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) && !UseMI->getOperand(1).getSubReg()) { LLVM_DEBUG(dbgs() << "Folding " << OpToFold << "\n into " << *UseMI << '\n'); Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3921,20 +3921,18 @@ ? MRI.getRegClass(Reg) : RI.getPhysRegClass(Reg); - const SIRegisterInfo *TRI = - static_cast(MRI.getTargetRegisterInfo()); - RC = TRI->getSubRegClass(RC, MO.getSubReg()); - - // In order to be legal, the common sub-class must be equal to the - // class of the current operand. For example: - // - // v_mov_b32 s0 ; Operand defined as vsrc_b32 - // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL - // - // s_sendmsg 0, s0 ; Operand defined as m0reg - // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL + const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass); + if (MO.getSubReg()) { + const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF); + if (!SuperRC) + return false; - return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; + DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg()); + if (!DRC) + return false; + } + return RC->hasSuperClassEq(DRC); } bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, Index: llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir @@ -0,0 +1,48 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands,dead-mi-elimination -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +--- + +# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_full +# GCN: %0:sgpr_32 = IMPLICIT_DEF +# GCN-NEXT: S_STORE_DWORD_IMM %0, undef $sgpr10_sgpr11, 0, 0, 0 + +name: fold_sgpr_to_sgpr_copy_full +body: | + bb.0: + + %0:sgpr_32 = IMPLICIT_DEF + %1:sgpr_32 = COPY %0 + %2:sgpr_32 = COPY %1 + S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0 +... + +# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg +# GCN: %0:sreg_64 = IMPLICIT_DEF +# GCN-NEXT: %2:sgpr_32 = COPY %0.sub0 +# GCN-NEXT: S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0 + +name: fold_sgpr_to_sgpr_copy_subreg +body: | + bb.0: + + %0:sreg_64 = IMPLICIT_DEF + %1:sgpr_32 = COPY %0.sub0 + %2:sgpr_32 = COPY %1 + S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0 +... + +# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg2 +# GCN: %0:sreg_64 = IMPLICIT_DEF +# GCN-NEXT: %3:sreg_32_xm0_xexec = COPY %0.sub0 +# GCN-NEXT: S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0 + +name: fold_sgpr_to_sgpr_copy_subreg2 +body: | + bb.0: + + %0:sreg_64 = IMPLICIT_DEF + %1:sgpr_32 = COPY %0.sub0 + %2:sgpr_32 = COPY %1 + %3:sreg_32_xm0_xexec = COPY %2 + S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0 +...