Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -148,7 +148,8 @@ std::pair selectVOP3ModsImpl(MachineOperand &Root, bool AllowAbs = true, - bool OpSel = false, bool ForceVGPR = false) const; + bool OpSel = false, bool ForceVGPR = false, + bool PeekSrc = false) const; InstructionSelector::ComplexRendererFns selectVCSRC(MachineOperand &Root) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3387,7 +3387,8 @@ } std::pair AMDGPUInstructionSelector::selectVOP3ModsImpl( - MachineOperand &Root, bool AllowAbs, bool OpSel, bool ForceVGPR) const { + MachineOperand &Root, bool AllowAbs, bool OpSel, bool ForceVGPR, + bool PeekSrc) const { Register Src = Root.getReg(); Register OrigSrc = Src; unsigned Mods = 0; @@ -3407,7 +3408,7 @@ if (OpSel) Mods |= SISrcMods::OP_SEL_0; - if ((Mods != 0 || ForceVGPR) && + if (!PeekSrc && (Mods != 0 || ForceVGPR) && RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) { MachineInstr *UseMI = Root.getParent(); @@ -4798,10 +4799,15 @@ Register Src; unsigned Mods; - std::tie(Src, Mods) = selectVOP3ModsImpl(Root); + // Peek Src, selectVOP3ModsImpl does not create additional instructions. + std::tie(Src, Mods) = selectVOP3ModsImpl(Root, true, false, false, true); MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); if (MI->getOpcode() == AMDGPU::G_FPEXT) { + // selectVOP3ModsImpl can create copy, make sure that select will succeed + // to avoid creating dead copy instruction. + std::tie(Src, Mods) = selectVOP3ModsImpl(Root); + MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); MachineOperand *MO = &MI->getOperand(1); Src = MO->getReg(); MI = getDefIgnoringCopies(Src, *MRI); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -128,6 +128,55 @@ ret float %b } +define amdgpu_ps float @dont_crash_after_fma_mix_select_attempt(float inreg %x, float %y, float %z) { +; GFX9-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX9-CONTRACT-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-CONTRACT-NEXT: ; return to shader part epilog +; +; GFX9-DENORM-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: v_mad_f32 v0, |s0|, v0, v1 +; GFX9-DENORM-NEXT: ; return to shader part epilog +; +; GFX9-UNSAFE-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-UNSAFE-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-NEXT: ; return to shader part epilog +; +; GFX10-CONTRACT-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-CONTRACT-NEXT: ; return to shader part epilog +; +; GFX10-DENORM-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-DENORM-NEXT: ; return to shader part epilog +; +; GFX10-UNSAFE-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-UNSAFE-NEXT: ; return to shader part epilog +.entry: + %abs_x = call contract float @llvm.fabs.f32(float %x) + %a = fmul contract float %abs_x, %y + %b = fadd contract float %a, %z + ret float %b +} + +declare float @llvm.fabs.f32(float) + define float @test_add_mul_multiple_defs_z(float %x, float %y, <2 x float> addrspace(1)* %vec_ptr) { ; GFX9-LABEL: test_add_mul_multiple_defs_z: ; GFX9: ; %bb.0: ; %.entry