Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -503,11 +503,11 @@ for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) { MachineOperand *Sub = &Def->getOperand(I); - assert (Sub->isReg()); + assert(Sub->isReg()); for (MachineInstr *SubDef = MRI.getVRegDef(Sub->getReg()); - SubDef && Sub->isReg() && !Sub->getSubReg() && - TII->isFoldableCopy(*SubDef); + SubDef && Sub->isReg() && Sub->getReg().isVirtual() && + !Sub->getSubReg() && TII->isFoldableCopy(*SubDef); SubDef = MRI.getVRegDef(Sub->getReg())) { MachineOperand *Op = &SubDef->getOperand(1); if (Op->isImm()) { @@ -515,7 +515,7 @@ Sub = Op; break; } - if (!Op->isReg()) + if (!Op->isReg() || Op->getReg().isPhysical()) break; Sub = Op; } Index: llvm/test/CodeGen/AMDGPU/swdev282079.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/swdev282079.mir @@ -0,0 +1,102 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-fold-operands -o - %s | FileCheck %s + +# This was attempting to look back through the REG_SEQUENCE source +# operands and trying to look for physreg defs. + +--- +name: fold_reg_sequence_of_copy_from_physreg_0 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 8 +body: | + bb.0: + ; CHECK-LABEL: name: fold_reg_sequence_of_copy_from_physreg_0 + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec + ; CHECK: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK: FLAT_STORE_DWORDX2 killed [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; CHECK: S_ENDPGM 0 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $vgpr1 = V_MOV_B32_e32 1, implicit $exec + S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr1 + %2:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + %3:vreg_64_align2 = IMPLICIT_DEF + FLAT_STORE_DWORDX2 killed %3, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + S_ENDPGM 0 + +... + +--- +name: fold_reg_sequence_of_copy_from_physreg_1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 8 +body: | + bb.0: + ; CHECK-LABEL: name: fold_reg_sequence_of_copy_from_physreg_1 + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec + ; CHECK: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK: FLAT_STORE_DWORDX2 killed [[REG_SEQUENCE]], killed [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; CHECK: S_ENDPGM 0 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $vgpr1 = V_MOV_B32_e32 1, implicit $exec + S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %2:vgpr_32 = COPY %0 + %3:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %2, %subreg.sub1 + %4:vreg_64_align2 = IMPLICIT_DEF + FLAT_STORE_DWORDX2 killed %3, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + S_ENDPGM 0 + +... + +--- +name: fold_reg_sequence_of_copy_from_physreg_2 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 8 +body: | + bb.0: + ; CHECK-LABEL: name: fold_reg_sequence_of_copy_from_physreg_2 + ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec + ; CHECK: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK: FLAT_STORE_DWORDX2 killed [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; CHECK: S_ENDPGM 0 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $vgpr1 = V_MOV_B32_e32 1, implicit $exec + S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1 + %0:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %1:vgpr_32 = COPY $vgpr0 + %2:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + %3:vreg_64_align2 = IMPLICIT_DEF + FLAT_STORE_DWORDX2 killed %3, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + S_ENDPGM 0 + +...