diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1769,21 +1769,11 @@ void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } - void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, - bool IsDPP8 = false); - void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { - cvtVOPCNoDstDPP(Inst, Operands, true); - } void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { cvtVOP3DPP(Inst, Operands, true); } - void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, - bool IsDPP8 = false); - void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { - cvtVOPC64NoDstDPP(Inst, Operands, true); - } OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, AMDGPUOperand::ImmTy Type); @@ -8769,14 +8759,6 @@ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); } -// Add dummy $old operand -void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, - const OperandVector &Operands, - bool IsDPP8) { - Inst.addOperand(MCOperand::createReg(0)); - cvtVOP3DPP(Inst, Operands, IsDPP8); -} - void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { OptionalImmIndexMap OptionalIdx; unsigned Opc = Inst.getOpcode(); @@ -8845,14 +8827,6 @@ } } -// Add dummy $old operand -void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, - const OperandVector &Operands, - bool IsDPP8) { - Inst.addOperand(MCOperand::createReg(0)); - cvtDPP(Inst, Operands, IsDPP8); -} - void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { OptionalImmIndexMap OptionalIdx; diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -222,6 +222,7 @@ // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst } + int OrigOpE32 = AMDGPU::getVOPe32(OrigOp); const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old); if (OldIdx != -1) { assert(OldIdx == NumOperands); @@ -234,6 +235,10 @@ DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef, CombOldVGPR.SubReg); ++NumOperands; + } else if (TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && + TII->isVOPC(OrigOpE32))) { + // VOPC DPP and VOPC promoted to VOP3 DPP do not have an old operand + // because they write to SGPRs not VGPRs } else { // TODO: this discards MAC/FMA instructions for now, let's add it later LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction," diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -627,7 +627,7 @@ bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc, unsigned OpNo) const { - return OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && + return OpNo == 0 && (Desc.TSFlags & SIInstrFlags::DPP) && (Desc.TSFlags & SIInstrFlags::VOPC) && (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) || Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)); @@ -644,8 +644,7 @@ // If there are printed modifiers, printOperandAndFPInputMods or // printOperandAndIntInputMods will be called instead if ((OpNo == 0 || - (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP)) || - (OpNo == 2 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) && + (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) && (Desc.TSFlags & SIInstrFlags::VOPC) && (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) || Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO))) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5018,10 +5018,8 @@ } if (MO->isReg()) { - if (!DefinedRC) { - // This operand allows any register. - return true; - } + if (!DefinedRC) + return OpInfo.OperandType == MCOI::OPERAND_UNKNOWN; if (!isLegalRegOperand(MRI, OpInfo, *MO)) return false; bool IsAGPR = RI.isAGPR(MRI, MO->getReg()); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1903,94 +1903,94 @@ class getInsDPPBase { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> { - dag ret = !if (!eq(NumSrcArgs, 0), + dag ret = !if(!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) (ins ), - !if (!eq(NumSrcArgs, 1), - !if (HasModifiers, - // VOP1_DPP with modifiers - (ins OldRC:$old, Src0Mod:$src0_modifiers, - Src0RC:$src0) - /* else */, - // VOP1_DPP without modifiers - (ins OldRC:$old, Src0RC:$src0) - /* endif */), - !if (!eq(NumSrcArgs, 2), - !if (HasModifiers, - // VOP2_DPP with modifiers - (ins OldRC:$old, - Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1) - /* else */, - // VOP2_DPP without modifiers - (ins OldRC:$old, - Src0RC:$src0, Src1RC:$src1) - ) - /* NumSrcArgs == 3, VOP3 */, - !if (HasModifiers, - // VOP3_DPP with modifiers - (ins OldRC:$old, - Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - Src2Mod:$src2_modifiers, Src2RC:$src2) - /* else */, - // VOP3_DPP without modifiers - (ins OldRC:$old, - Src0RC:$src0, Src1RC:$src1, - Src2RC:$src2) + !con( + !if(HasOld ,(ins OldRC:$old), (ins)), + !if (!eq(NumSrcArgs, 1), + !if (HasModifiers, + // VOP1_DPP with modifiers + (ins Src0Mod:$src0_modifiers, Src0RC:$src0) + /* else */, + // VOP1_DPP without modifiers + (ins Src0RC:$src0) + /* endif */), + !if (!eq(NumSrcArgs, 2), + !if (HasModifiers, + // VOP2_DPP with modifiers + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1) + /* else */, + // VOP2_DPP without modifiers + (ins Src0RC:$src0, Src1RC:$src1) + ) + /* NumSrcArgs == 3, VOP3 */, + !if (HasModifiers, + // VOP3_DPP with modifiers + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2) + /* else */, + // VOP3_DPP without modifiers + (ins Src0RC:$src0, Src1RC:$src1, + Src2RC:$src2) + ) + ) + ) ) - /* endif */))); + ); } class getInsDPP { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { dag ret = !con(getInsDPPBase.ret, + HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); } class getInsDPP16 { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { dag ret = !con(getInsDPP.ret, + HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, (ins FI:$fi)); } class getInsDPP8 { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { dag ret = !con(getInsDPPBase.ret, + HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, (ins dpp8:$dpp8, FI:$fi)); } -class getInsVOP3DPPBase { +class getInsVOP3DPPBase { dag old = ( ins OldRC:$old ); dag base = VOP3Base; dag ret = !con( - !if(!ne(NumSrcArgs, 0), old, (ins)), + !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)), base ); } -class getInsVOP3DPP { - dag ret = !con(getInsVOP3DPPBase.ret, +class getInsVOP3DPP { + dag ret = !con(getInsVOP3DPPBase.ret, (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); } -class getInsVOP3DPP16 { - dag ret = !con(getInsVOP3DPP.ret, +class getInsVOP3DPP16 { + dag ret = !con(getInsVOP3DPP.ret, (ins FI:$fi)); } -class getInsVOP3DPP8 { - dag ret = !con(getInsVOP3DPPBase.ret, +class getInsVOP3DPP8 { + dag ret = !con(getInsVOP3DPPBase.ret, (ins dpp8:$dpp8, FI:$fi)); } diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -59,15 +59,17 @@ "$src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"); let AsmDPP8 = "$src0, $src1 $dpp8$fi"; let AsmDPP16 = AsmDPP#"$fi"; + // VOPC DPP Instructions do not need an old operand + let TieRegDPP = ""; let InsDPP = getInsDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, - Src2ModDPP>.ret; + Src2ModDPP, 0/*HasOld*/>.ret; let InsDPP16 = getInsDPP16, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, - Src2ModDPP>.ret; + Src2ModDPP, 0/*HasOld*/>.ret; let InsDPP8 = getInsDPP8, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, - Src2ModDPP>.ret; + Src2ModDPP, 0/*HasOld*/>.ret; // The destination for 32-bit encoding is implicit. let HasDst32 = 0; @@ -76,9 +78,9 @@ let Outs64 = (outs VOPDstS64orS32:$sdst); let OutsVOP3DPP = Outs64; let OutsVOP3DPP8 = Outs64; - let InsVOP3DPP = getInsVOP3DPP.ret; - let InsVOP3DPP16 = getInsVOP3DPP16.ret; - let InsVOP3DPP8 = getInsVOP3DPP8.ret; + let InsVOP3DPP = getInsVOP3DPP.ret; + let InsVOP3DPP16 = getInsVOP3DPP16.ret; + let InsVOP3DPP8 = getInsVOP3DPP8.ret; list Schedule = sched; } @@ -293,7 +295,7 @@ let Defs = !if(DefExec, [EXEC], []); let SchedRW = P.Schedule; let isCompare = 1; - let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $sdst", ""); + let Constraints = ""; } } // end SubtargetPredicate = isGFX11Plus @@ -711,7 +713,7 @@ VOPC_Profile { let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let AsmDPP16 = AsmDPP#"$fi"; - let InsDPP = (ins VGPR_32:$old, FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsDPP = (ins FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); // DPP8 forbids modifiers and can inherit from VOPC_Profile @@ -793,7 +795,7 @@ def _e64_dpp : VOP3_DPP_Pseudo { let Defs = !if(DefExec, [EXEC], []); let SchedRW = p.Schedule; - let Constraints = !if(p.NumSrcArgs, p.TieRegDPP # " = $sdst", ""); + let Constraints = ""; } } // end SubtargetPredicate = isGFX11Plus } @@ -1068,7 +1070,6 @@ let Uses = ps.Uses; let OtherPredicates = ps.OtherPredicates; let Constraints = ps.Constraints; - let AsmMatchConverter = "cvtVOPCNoDstDPP"; } class VOPC_DPP16_SIMC op, VOP_DPP_Pseudo ps, int subtarget, @@ -1084,7 +1085,6 @@ let Uses = ps.Uses; let OtherPredicates = ps.OtherPredicates; let Constraints = ""; - let AsmMatchConverter = "cvtVOPCNoDstDPP8"; } // VOPC64 @@ -1133,7 +1133,6 @@ string opName = ps.OpName> : VOPC64_DPP16 { let Inst{7-0} = ? ; - let AsmMatchConverter = "cvtVOPC64NoDstDPP"; } class VOPC64_DPP8_Base op, string OpName, VOPProfile P> @@ -1163,13 +1162,12 @@ : VOPC64_DPP8 { bits<8> sdst; let Inst{7-0} = sdst; - let Constraints = "$old = $sdst"; + let Constraints = ""; } class VOPC64_DPP8_NoDst op, VOP_Pseudo ps, string opName = ps.OpName> : VOPC64_DPP8 { let Inst{7-0} = ? ; - let AsmMatchConverter = "cvtVOPC64NoDstDPP8"; let Constraints = ""; } diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.ll b/llvm/test/CodeGen/AMDGPU/vopc_dpp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s + +define amdgpu_cs void @_amdgpu_cs_main(i32 %0) { +; GFX11-LABEL: _amdgpu_cs_main: +; GFX11: v_cmp_eq_u32_e64_dpp s1, v1, v0 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 +.entry: + %1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 0, i32 0, i32 15, i32 15, i1 false) + %2 = icmp ne i32 %1, %0 + %spec.select.3 = select i1 %2, i32 0, i32 1 + call void @llvm.amdgcn.raw.buffer.store.i32(i32 %spec.select.3, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0) + ret void +} + +declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) +declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir --- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir @@ -11,22 +11,23 @@ ; GCN-LABEL: name: vopc ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: V_CMP_LT_F32_e32_dpp [[DEF]], 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec - ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec - ; GCN: V_CMPX_EQ_I16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec - ; GCN: V_CMP_CLASS_F16_e32_dpp [[DEF]], 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec - ; GCN: [[V_CMP_GE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64_dpp [[DEF]], 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec - ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec - ; GCN: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec - ; GCN: V_CMP_CLASS_F32_e32_dpp [[DEF]], 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec - ; GCN: V_CMP_NGE_F16_e32_dpp [[DEF]], 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec - ; GCN: [[V_CMP_NGE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_e64_dpp [[DEF]], 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec - ; GCN: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F16_e64_dpp]], 10101, implicit-def $scc - ; GCN: V_CMP_GT_I32_e32_dpp [[DEF]], [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: V_CMPX_EQ_I16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: V_CMP_CLASS_F16_e32_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec + ; GCN-NEXT: [[V_CMP_GE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec + ; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec + ; GCN-NEXT: V_CMP_NGE_F16_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_NGE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F16_e64_dpp]], 10101, implicit-def $scc + ; GCN-NEXT: V_CMP_GT_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 %2:vgpr_32 = COPY $vgpr2