diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1599,6 +1599,9 @@ return false; unsigned OpIdx = Op - &UseMI->getOperand(0); + if (TII->isVariadicOrInvalidRegClassAtOpIdx(*UseMI, OpIdx)) + return false; + const MCInstrDesc &InstDesc = UseMI->getDesc(); if (!TRI->isVectorSuperClass( TRI->getRegClass(InstDesc.OpInfo[OpIdx].RegClass))) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -868,6 +868,9 @@ unsigned getVALUOp(const MachineInstr &MI) const; + bool isVariadicOrInvalidRegClassAtOpIdx(const MachineInstr &MI, + unsigned OpIdx) const; + /// Return the correct register class for \p OpNo. For target-specific /// instructions, this will return the register class that has been defined /// in tablegen. For generic instructions, like REG_SEQUENCE it will return diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4755,12 +4755,20 @@ return RI.getRegClass(RegClass); } +bool SIInstrInfo::isVariadicOrInvalidRegClassAtOpIdx(const MachineInstr &MI, + unsigned OpIdx) const { + const MCInstrDesc &Desc = get(MI.getOpcode()); + if (MI.isVariadic() || OpIdx >= Desc.getNumOperands() || + Desc.OpInfo[OpIdx].RegClass == -1) + return true; + + return false; +} + const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, unsigned OpNo) const { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - const MCInstrDesc &Desc = get(MI.getOpcode()); - if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || - Desc.OpInfo[OpNo].RegClass == -1) { + if (isVariadicOrInvalidRegClassAtOpIdx(MI, OpNo)) { Register Reg = MI.getOperand(OpNo).getReg(); if (Reg.isVirtual()) @@ -4768,6 +4776,7 @@ return RI.getPhysRegClass(Reg); } + const MCInstrDesc &Desc = get(MI.getOpcode()); unsigned RCID = Desc.OpInfo[OpNo].RegClass; RCID = adjustAllocatableRegClass(ST, MRI, Desc, RCID, true); return RI.getRegClass(RCID); diff --git a/llvm/test/CodeGen/AMDGPU/skip-fold-regsequence-with-undef-regclass-opnd-users.mir b/llvm/test/CodeGen/AMDGPU/skip-fold-regsequence-with-undef-regclass-opnd-users.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/skip-fold-regsequence-with-undef-regclass-opnd-users.mir @@ -0,0 +1,50 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck -check-prefix=GCN %s + +# Skip folding a regsequence when it has users whose operands' regclasses can't be fully determined from its instruction description. +--- +name: regsequence_with_regsequence_use_op +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0, $agpr1 + + ; GCN-LABEL: name: regsequence_with_regsequence_use_op + ; GCN: liveins: $agpr0, $agpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $agpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $agpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, killed [[DEF]], %subreg.sub2 + ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]] + %0:vgpr_32 = COPY $agpr0 + %1:vgpr_32 = COPY $agpr1 + %2:vreg_64_align2 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, %1:vgpr_32, %subreg.sub1 + %3:vgpr_32 = IMPLICIT_DEF + %4:vreg_96_align2 = REG_SEQUENCE killed %2:vreg_64_align2, %subreg.sub0_sub1, killed %3:vgpr_32, %subreg.sub2 + S_ENDPGM 0, implicit %4 +... +--- +name: insert_subreg_with_regsequence_use_op +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0, $agpr1 + + ; GCN-LABEL: name: insert_subreg_with_regsequence_use_op + ; GCN: liveins: $agpr0, $agpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $agpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $agpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: S_NOP 0, implicit-def %3 + ; GCN-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64_align2 = INSERT_SUBREG %3, [[REG_SEQUENCE]], %subreg.sub0_sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr_32 = COPY $agpr0 + %1:vgpr_32 = COPY $agpr1 + %2:vreg_64_align2 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, %1:vgpr_32, %subreg.sub1 + S_NOP 0, implicit-def %3:vreg_64_align2 + %4:vreg_64_align2 = INSERT_SUBREG %3, %2, %subreg.sub0_sub1 + S_ENDPGM 0, implicit %4 +...