Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4380,9 +4380,19 @@ if (Op.isReg()) { const MachineInstr *Def = MRI.getUniqueVRegDef(Op.getReg()); - if (!Def || !Def->isMoveImmediate()) + if (!Def) return false; + if (!Def->isMoveImmediate()) { + if (Def->isCopy()) { + // Sometimes extra copies are inserted from SGPRs to VGPRs to satisfy + // operand constraints. + return isImmOrMaterializedImm(MRI, Def->getOperand(1), Imm); + } + + return false; + } + const MachineOperand &Src = Def->getOperand(1); if (Src.isImm()) { Imm = Src.getImm(); Index: test/CodeGen/AMDGPU/lshr.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/lshr.v2i16.ll +++ test/CodeGen/AMDGPU/lshr.v2i16.ll @@ -38,8 +38,7 @@ ; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; CI: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 16 ; CI: v_lshrrev_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} -; CI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CI: v_cvt_pk_u16_u32 define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64