Index: lib/CodeGen/PeepholeOptimizer.cpp =================================================================== --- lib/CodeGen/PeepholeOptimizer.cpp +++ lib/CodeGen/PeepholeOptimizer.cpp @@ -1808,7 +1808,7 @@ assert(Def->isCopy() && "Invalid definition"); // Copy instruction are supposed to be: Def = Src. // If someone breaks this assumption, bad things will happen everywhere. - assert(Def->getNumOperands() == 2 && "Invalid number of operands"); + assert(Def->getNumExplicitOperands() == 2 && "Invalid number of operands"); if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of src. Index: lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- lib/Target/AMDGPU/SIFoldOperands.cpp +++ lib/Target/AMDGPU/SIFoldOperands.cpp @@ -499,6 +499,13 @@ return; UseMI->setDesc(TII->get(MovOp)); + MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin(); + MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end(); + while (ImpOpI != ImpOpE) { + MachineInstr::mop_iterator Tmp = ImpOpI; + ImpOpI++; + UseMI->RemoveOperand(UseMI->getOperandNo(Tmp)); + } CopiesToReplace.push_back(UseMI); } else { if (UseMI->isCopy() && OpToFold.isReg() && Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3919,6 +3919,15 @@ // Try to eliminate the copy if it is copying an immediate value. if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) FoldImmediate(*Copy, *Def, OpReg, &MRI); + + bool ImpDef = Def->isImplicitDef(); + while (!ImpDef && Def && Def->isCopy()) { + Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg()); + ImpDef = Def && Def->isImplicitDef(); + } + if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) && + !ImpDef) + Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); } // Emit the actual waterfall loop, executing the wrapped instruction for each Index: test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir =================================================================== --- test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -882,7 +882,7 @@ --- # GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}} -# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec, implicit $exec +# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec # GCN-NEXT: S_ENDPGM 0, implicit %3 name: constant_fold_lshl_or_reg0_immreg_immreg