Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4851,37 +4851,39 @@ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) return false; - MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); - MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + DefinitionAndSourceRegister LHS = {MRI.getVRegDef(MI.getOperand(1).getReg()), + MI.getOperand(1).getReg()}; + DefinitionAndSourceRegister RHS = {MRI.getVRegDef(MI.getOperand(2).getReg()), + MI.getOperand(2).getReg()}; unsigned PreferredFusedOpcode = HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. - if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && - isContractableFMul(*RHS, AllowFusionGlobally)) { - if (hasMoreUses(*LHS, *RHS, MRI)) + if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) && + isContractableFMul(*RHS.MI, AllowFusionGlobally)) { + if (hasMoreUses(*LHS.MI, *RHS.MI, MRI)) std::swap(LHS, RHS); } // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (isContractableFMul(*LHS, AllowFusionGlobally) && - (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()))) { + if (isContractableFMul(*LHS.MI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) { MatchInfo = [=, &MI](MachineIRBuilder &B) { B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, - {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), - RHS->getOperand(0).getReg()}); + {LHS.MI->getOperand(1).getReg(), + LHS.MI->getOperand(2).getReg(), RHS.Reg}); }; return true; } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) - if (isContractableFMul(*RHS, AllowFusionGlobally) && - (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()))) { + if (isContractableFMul(*RHS.MI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) { MatchInfo = [=, &MI](MachineIRBuilder &B) { B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, - {RHS->getOperand(1).getReg(), RHS->getOperand(2).getReg(), - LHS->getOperand(0).getReg()}); + {RHS.MI->getOperand(1).getReg(), + RHS.MI->getOperand(2).getReg(), LHS.Reg}); }; return true; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir @@ -218,9 +218,8 @@ ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[LOAD]](<2 x s32>) - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[TRUNC]] + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) ; GFX9-DENORM-LABEL: name: test_add_mul_multiple_defs_z ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -240,9 +239,8 @@ ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[LOAD]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[TRUNC]] + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) ; GFX10-LABEL: name: test_add_mul_multiple_defs_z ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -262,9 +260,8 @@ ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[LOAD]](<2 x s32>) - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[TRUNC]] + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) ; GFX10-DENORM-LABEL: name: test_add_mul_multiple_defs_z ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -284,9 +281,8 @@ ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[LOAD]](<2 x s32>) - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[TRUNC]] + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -325,9 +321,8 @@ ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[LOAD]](<2 x s32>) - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[TRUNC]] + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) ; GFX9-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -347,9 +342,8 @@ ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[LOAD]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[TRUNC]] + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) ; GFX10-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -369,9 +363,8 @@ ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[LOAD]](<2 x s32>) - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[TRUNC]] + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) ; GFX10-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -391,9 +384,8 @@ ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[LOAD]](<2 x s32>) - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[TRUNC]] + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -149,11 +149,10 @@ ; GFX9-DENORM-LABEL: test_add_mul_multiple_defs_z: ; GFX9-DENORM: ; %bb.0: ; %.entry ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-DENORM-NEXT: v_mov_b32_e32 v4, v0 -; GFX9-DENORM-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-DENORM-NEXT: global_load_dwordx2 v[0:1], v[2:3], off +; GFX9-DENORM-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) -; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v4, v5 +; GFX9-DENORM-NEXT: v_mac_f32_e32 v3, v0, v1 +; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v3 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-UNSAFE-LABEL: test_add_mul_multiple_defs_z: @@ -188,11 +187,10 @@ ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-DENORM-NEXT: v_mov_b32_e32 v4, v0 -; GFX10-DENORM-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-DENORM-NEXT: global_load_dwordx2 v[0:1], v[2:3], off +; GFX10-DENORM-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) -; GFX10-DENORM-NEXT: v_mac_f32_e32 v0, v4, v5 +; GFX10-DENORM-NEXT: v_mac_f32_e32 v3, v0, v1 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v3 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-UNSAFE-LABEL: test_add_mul_multiple_defs_z: @@ -233,11 +231,10 @@ ; GFX9-DENORM-LABEL: test_add_mul_rhs_multiple_defs_z: ; GFX9-DENORM: ; %bb.0: ; %.entry ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-DENORM-NEXT: v_mov_b32_e32 v4, v0 -; GFX9-DENORM-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-DENORM-NEXT: global_load_dwordx2 v[0:1], v[2:3], off +; GFX9-DENORM-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) -; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v4, v5 +; GFX9-DENORM-NEXT: v_mac_f32_e32 v3, v0, v1 +; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v3 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z: @@ -272,11 +269,10 @@ ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-DENORM-NEXT: v_mov_b32_e32 v4, v0 -; GFX10-DENORM-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-DENORM-NEXT: global_load_dwordx2 v[0:1], v[2:3], off +; GFX10-DENORM-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) -; GFX10-DENORM-NEXT: v_mac_f32_e32 v0, v4, v5 +; GFX10-DENORM-NEXT: v_mac_f32_e32 v3, v0, v1 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v3 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z: