diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -8550,7 +8550,7 @@ llvm_unreachable("Unhandled operand type in cvtVOPD"); }; - auto InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); + const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); // MCInst operands are ordered as follows: // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] @@ -8560,9 +8560,11 @@ } for (auto CompIdx : VOPD::COMPONENTS) { + const auto &CInfo = InstInfo[CompIdx]; + bool CompHasSrc2Acc = CInfo.hasSrc2Acc(); auto SrcOperandsNum = InstInfo[CompIdx].getSrcOperandsNum(); for (unsigned SrcIdx = 0; SrcIdx < SrcOperandsNum; ++SrcIdx) { - addOp(InstInfo[CompIdx].getParsedSrcIndex(SrcIdx)); + addOp(CInfo.getParsedSrcIndex(SrcIdx, CompHasSrc2Acc)); } } } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -549,18 +549,20 @@ // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] // Each ComponentKind has operand indices defined below. - static constexpr unsigned PARSED_DST_IDX[] = {1, 1, 4 /* + OpXSrcNum */}; - static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {2, 2, - 5 /* + OpXSrcNum */}; + static constexpr unsigned PARSED_DST_IDX[] = {1, 1, + 4 /* + ParsedOpXSrcNum */}; + static constexpr unsigned FIRST_PARSED_SRC_IDX[] = { + 2, 2, 5 /* + ParsedOpXSrcNum */}; private: ComponentKind Kind; unsigned OpXSrcNum; + unsigned ParsedOpXSrcNum; public: - ComponentLayout(ComponentKind Kind_ = ComponentKind::SINGLE, - unsigned OpXSrcNum_ = 0) - : Kind(Kind_), OpXSrcNum(OpXSrcNum_) { + ComponentLayout(ComponentKind Kind = ComponentKind::SINGLE, + unsigned OpXSrcNum = 0, unsigned ParsedOpXSrcNum = 0) + : Kind(Kind), OpXSrcNum(OpXSrcNum), ParsedOpXSrcNum(ParsedOpXSrcNum) { assert(Kind <= ComponentKind::MAX); assert((Kind == ComponentKind::COMPONENT_Y) == (OpXSrcNum > 0)); } @@ -573,11 +575,15 @@ } unsigned getParsedDstIndex() const { - return PARSED_DST_IDX[Kind] + OpXSrcNum; + return PARSED_DST_IDX[Kind] + ParsedOpXSrcNum; } - unsigned getParsedSrcIndex(unsigned SrcIdx) const { + unsigned getParsedSrcIndex(unsigned SrcIdx, bool ComponentHasSrc2Acc) const { assert(SrcIdx < Component::MAX_SRC_NUM); - return FIRST_PARSED_SRC_IDX[Kind] + OpXSrcNum + SrcIdx; + // FMAC and DOT2C have a src2 operand on the MCInst but + // not on the asm representation. src2 is tied to dst. + if (ComponentHasSrc2Acc && SrcIdx == (MAX_SRC_NUM - 1)) + return getParsedDstIndex(); + return FIRST_PARSED_SRC_IDX[Kind] + ParsedOpXSrcNum + SrcIdx; } }; @@ -616,8 +622,9 @@ public: ComponentInfo(const MCInstrDesc &OpDesc, ComponentKind Kind = ComponentKind::SINGLE, - unsigned OpXSrcNum = 0) - : ComponentLayout(Kind, OpXSrcNum), ComponentProps(OpDesc) {} + unsigned OpXSrcNum = 0, unsigned ParsedOpXSrcNum = 0) + : ComponentLayout(Kind, OpXSrcNum, ParsedOpXSrcNum), + ComponentProps(OpDesc) {} // Map MC operand index to parsed operand index. // Return 0 if the specified operand does not exist. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -480,10 +480,10 @@ assert(TiedIdx == -1 || TiedIdx == Component::DST); HasSrc2Acc = TiedIdx != -1; - SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs() - HasSrc2Acc; + SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs(); assert(SrcOperandsNum <= Component::MAX_SRC_NUM); - auto OperandsNum = OpDesc.getNumOperands() - HasSrc2Acc; + auto OperandsNum = OpDesc.getNumOperands(); for (unsigned OprIdx = Component::SRC1; OprIdx < OperandsNum; ++OprIdx) { if (OpDesc.OpInfo[OprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { MandatoryLiteralIdx = OprIdx; @@ -500,7 +500,7 @@ auto SrcIdx = OprIdx - Component::DST_NUM; if (SrcIdx < getSrcOperandsNum()) - return getParsedSrcIndex(SrcIdx); + return getParsedSrcIndex(SrcIdx, hasSrc2Acc()); // The specified operand does not exist. return 0; @@ -539,8 +539,6 @@ unsigned Src2Reg = 0; if (Comp.hasRegularSrcOperand(2)) Src2Reg = GetRegIdx(ComponentIdx, Comp.getSrcIndex(2)); - else if (Comp.hasSrc2Acc()) - Src2Reg = DstReg; return {DstReg, Src0Reg, Src1Reg, Src2Reg}; } @@ -557,8 +555,9 @@ const auto &OpXDesc = InstrInfo->get(OpX); const auto &OpYDesc = InstrInfo->get(OpY); VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); - VOPD::ComponentInfo OpYInfo(OpYDesc, VOPD::ComponentKind::COMPONENT_Y, - OpXInfo.getSrcOperandsNum()); + VOPD::ComponentInfo OpYInfo( + OpYDesc, VOPD::ComponentKind::COMPONENT_Y, OpXInfo.getSrcOperandsNum(), + OpXInfo.getSrcOperandsNum() - OpXInfo.hasSrc2Acc()); return VOPD::InstInfo(OpXInfo, OpYInfo); } diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -427,6 +427,15 @@ let InsVOP3Base = getIns64, 3, 0, HasModifiers, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; + // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu + let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); + let InsVOPDXDeferred = + (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, + VGPR_32:$vsrc1X, VGPRSrc_32:$src2X); + let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y); + let InsVOPDYDeferred = + (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, + VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y); let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1ModDPP:$src1_modifiers, Src1DPP:$src1, diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPDInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td @@ -76,6 +76,21 @@ let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg); let mayRaiseFPException = ReadsModeReg; + // V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 need a dummy src2 tied to dst for + // passes to track its uses. Its presence does not affect VOPD formation rules + // because the rules for src2 and dst are the same. src2X and src2Y should not + // be encoded. + bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16")); + bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16")); + string ConstraintsX = !if(hasSrc2AccX, "$src2X = $vdstX", ""); + string ConstraintsY = !if(hasSrc2AccY, "$src2Y = $vdstY", ""); + let Constraints = + ConstraintsX # !if(!and(hasSrc2AccX, hasSrc2AccY), ", ", "") # ConstraintsY; + string DisableEncodingX = !if(hasSrc2AccX, "$src2X", ""); + string DisableEncodingY = !if(hasSrc2AccY, "$src2Y", ""); + let DisableEncoding = + DisableEncodingX # !if(!and(hasSrc2AccX, hasSrc2AccY), ", ", "") # DisableEncodingY; + let Uses = RegListUnion.ret; let Defs = RegListUnion.ret; let SchedRW = !listconcat(VDX.SchedRW, VDY.SchedRW); diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir --- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir +++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir @@ -68,7 +68,7 @@ ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF - ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = IMPLICIT_DEF @@ -133,7 +133,7 @@ ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF ; PAIR-NEXT: $sgpr20 = IMPLICIT_DEF ; PAIR-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec - ; PAIR-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32 $sgpr20, killed $vgpr1, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc + ; PAIR-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc ; PAIR-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc ; PAIR-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec ; PAIR-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc @@ -246,7 +246,7 @@ ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF - ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = IMPLICIT_DEF @@ -276,7 +276,7 @@ ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF - ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = IMPLICIT_DEF @@ -447,7 +447,7 @@ ; PAIR-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec ; PAIR-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32 $vgpr0, $vgpr3, 10, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec ; PAIR-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc ; PAIR-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec ; PAIR-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir b/llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir +++ /dev/null @@ -1,28 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=gcn-create-vopd,amdgpu-insert-delay-alu %s -o - | FileCheck %s - -# FIXME: Second VOPD pair reads vgpr0 and vgpr1 written by first pair, so there -# should be a delay. ---- -name: vopd_fmac_fmac -tracksRegLiveness: true -body: | - bb.0: - ; CHECK-LABEL: name: vopd_fmac_fmac - ; CHECK: $vgpr0 = IMPLICIT_DEF - ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF - ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF - ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF - ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF - ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr3, $vgpr4, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr3, $vgpr4, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = IMPLICIT_DEF - $vgpr3 = IMPLICIT_DEF - $vgpr4 = IMPLICIT_DEF - $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec - $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec - $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec - $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec -... diff --git a/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir b/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir @@ -0,0 +1,51 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=gcn-create-vopd,amdgpu-insert-delay-alu %s -o - | FileCheck %s + +--- +name: vopd_fmac_fmac +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: vopd_fmac_fmac + ; CHECK: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; CHECK-NEXT: S_DELAY_ALU 1 + ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec + $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec + $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec + $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec +... +--- +name: vopd_dot2c_dot2c +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: vopd_dot2c_dot2c + ; CHECK: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; CHECK-NEXT: S_DELAY_ALU 1 + ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec + $vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec + $vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec + $vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec +... diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s @@ -266,4 +266,4 @@ v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3 // GFX11: error: src2 operands must use different VGPR banks // GFX11-NEXT:{{^}}v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3 -// GFX11-NEXT:{{^}} ^ +// GFX11-NEXT:{{^}} ^