Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -562,7 +562,7 @@ // LSB mask for VGPR banks per VOPD component operand. // 4 banks result in a mask 3, setting 2 lower bits. -constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1}; +constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 3}; enum ComponentIndex : unsigned { X = 0, Y = 1 }; constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y}; Index: llvm/test/CodeGen/AMDGPU/vopd-combine.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/vopd-combine.mir +++ llvm/test/CodeGen/AMDGPU/vopd-combine.mir @@ -7,6 +7,7 @@ define void @vopd_schedule() { ret void } define void @vopd_fmamk() { ret void } define void @vopd_fmamk_fail() { ret void } + define void @vopd_fmamk_src2_step2() { ret void } define void @vopd_cndmask() { ret void } define void @vopd_mov() { ret void } define void @vopd_mov_mov() { ret void } @@ -89,21 +90,49 @@ ; SCHED: $vgpr1 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr2 = V_XOR_B32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = V_FMAMK_F32 killed $vgpr0, 10, killed $vgpr6, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec + ; PAIR-LABEL: name: vopd_fmamk_fail + ; PAIR: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = V_XOR_B32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = V_FMAMK_F32 killed $vgpr0, 10, killed $vgpr6, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr2 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_XOR_B32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr6 = IMPLICIT_DEF + ; should not pair + $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr2, implicit $mode, implicit $exec + $vgpr5 = V_FMAMK_F32 $vgpr0, 10, $vgpr6, implicit $mode, implicit $exec + +... + +--- +name: vopd_fmamk_src2_step2 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_fmamk_src2_step2 + ; SCHED: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_XOR_B32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr5 = V_FMAMK_F32 killed $vgpr0, 10, killed $vgpr4, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec - ; PAIR-LABEL: name: vopd_fmamk_fail + ; PAIR-LABEL: name: vopd_fmamk_src2_step2 ; PAIR: $vgpr1 = IMPLICIT_DEF ; PAIR-NEXT: $vgpr2 = V_XOR_B32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF - ; PAIR-NEXT: $vgpr5 = V_FMAMK_F32 killed $vgpr0, 10, killed $vgpr4, implicit $mode, implicit $exec - ; PAIR-NEXT: $vgpr2 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 10, killed $vgpr4, killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = V_XOR_B32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec $vgpr4 = IMPLICIT_DEF - ; should not pair $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr2, implicit $mode, implicit $exec $vgpr5 = V_FMAMK_F32 $vgpr0, 10, $vgpr4, implicit $mode, implicit $exec Index: llvm/test/MC/AMDGPU/gfx11_asm_vopd_err.s =================================================================== --- llvm/test/MC/AMDGPU/gfx11_asm_vopd_err.s +++ llvm/test/MC/AMDGPU/gfx11_asm_vopd_err.s @@ -278,9 +278,9 @@ // Src2 operands must use different VGPR banks. //===----------------------------------------------------------------------===// -v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmamk_f32 v5, v2, 0xaf123456, v5 +v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmamk_f32 v5, v2, 0xaf123456, v7 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src2 operands must use different VGPR banks -// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmamk_f32 v5, v2, 0xaf123456, v5 +// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmamk_f32 v5, v2, 0xaf123456, v7 // GFX11-NEXT:{{^}} ^ v_dual_fmac_f32 v7, v1, v2 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v3 @@ -288,9 +288,9 @@ // GFX11-NEXT:{{^}}v_dual_fmac_f32 v7, v1, v2 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v3 // GFX11-NEXT:{{^}} ^ -v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3 +v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v7, v2, v5 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src2 operands must use different VGPR banks -// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3 +// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v7, v2, v5 // GFX11-NEXT:{{^}} ^ //===----------------------------------------------------------------------===//