diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -123,6 +123,13 @@ LLVM_DEBUG(dbgs() << " Inst hasn't e32 equivalent\n"); return false; } + if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) { + // Give up if there are any uses of the carry-out from instructions like + // V_ADD_CO_U32. The shrunken form of the instruction would write it to vcc + // instead of to a virtual register. + if (!MRI->use_nodbg_empty(SDst->getReg())) + return false; + } // check if other than abs|neg modifiers are set (opsel for example) const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG); if (!hasNoImmOrEqual(MI, AMDGPU::OpName::src0_modifiers, 0, Mask) || diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir --- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir @@ -354,6 +354,26 @@ %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec ... +# GCN-LABEL: name: add_co_u32_e64 +# GCN: %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec + +name: add_co_u32_e64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr1 + %2:vgpr_32 = IMPLICIT_DEF + + ; this shouldn't be combined as the carry-out is used + %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec + %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec + + S_NOP 0, implicit %5 +... + # tests on sequences of dpp consumers # GCN-LABEL: name: dpp_seq # GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec