diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -353,6 +353,10 @@ auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst); assert(DstOpnd && DstOpnd->isReg()); auto DPPMovReg = DstOpnd->getReg(); + if (DPPMovReg.isPhysical()) { + LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n"); + return false; + } if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) { LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" " for all uses\n"); diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir --- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir @@ -563,6 +563,18 @@ %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $exec ... +# Do not combine a dpp mov which writes a physreg. +# GCN-LABEL: name: phys_dpp_mov_dst +# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec +# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $exec +name: phys_dpp_mov_dst +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $exec +... + # GCN-LABEL: name: dpp_reg_sequence_both_combined # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3