diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -202,6 +202,18 @@ LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n"); return nullptr; } + int OrigOpE32 = AMDGPU::getVOPe32(OrigOp); + // Prior checks cover Mask with VOPC condition, but not on purpose + auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask); + assert(RowMaskOpnd && RowMaskOpnd->isImm()); + auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask); + assert(BankMaskOpnd && BankMaskOpnd->isImm()); + const bool MaskAllLanes = + RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF; + assert(MaskAllLanes || + !(TII->isVOPC(DPPOp) || + (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) && + "VOPC cannot form DPP unless mask is full"); auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI, OrigMI.getDebugLoc(), TII->get(DPPOp)) @@ -222,7 +234,6 @@ // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst } - int OrigOpE32 = AMDGPU::getVOPe32(OrigOp); const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old); if (OldIdx != -1) { assert(OldIdx == NumOperands); diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir --- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir @@ -67,3 +67,36 @@ V_CMP_LT_I32_e32 %0, %18, implicit-def $vcc, implicit $exec ... +--- + +name: mask_not_full +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GCN-LABEL: name: mask_not_full + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec + ; GCN-NEXT: V_CMP_CLASS_F16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec + ; GCN-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr1 + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + + ; Do not combine VOPC when row_mask or bank_mask is not 0xf + ; All cases are covered by generic rules for creating DPP instructions + %4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec + V_CMP_CLASS_F16_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec + + %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec + %6:sgpr_32 = V_CMP_GE_F16_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec + +...