Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -146,6 +146,9 @@ bool selectSMFMACIntrin(MachineInstr &I) const; bool selectWaveAddress(MachineInstr &I) const; + std::pair getVOP3ModsImpl(MachineOperand &Root, + bool AllowAbs = true, + bool OpSel = false) const; std::pair selectVOP3ModsImpl(MachineOperand &Root, bool AllowAbs = true, bool OpSel = false, bool ForceVGPR = false) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3386,10 +3386,10 @@ } -std::pair AMDGPUInstructionSelector::selectVOP3ModsImpl( - MachineOperand &Root, bool AllowAbs, bool OpSel, bool ForceVGPR) const { +std::pair +AMDGPUInstructionSelector::getVOP3ModsImpl(MachineOperand &Root, bool AllowAbs, + bool OpSel) const { Register Src = Root.getReg(); - Register OrigSrc = Src; unsigned Mods = 0; MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); @@ -3407,6 +3407,16 @@ if (OpSel) Mods |= SISrcMods::OP_SEL_0; + return std::make_pair(Src, Mods); +} + +std::pair AMDGPUInstructionSelector::selectVOP3ModsImpl( + MachineOperand &Root, bool AllowAbs, bool OpSel, bool ForceVGPR) const { + Register OrigSrc = Root.getReg(); + Register Src; + unsigned Mods; + std::tie(Src, Mods) = getVOP3ModsImpl(Root, AllowAbs, OpSel); + if ((Mods != 0 || ForceVGPR) && RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) { MachineInstr *UseMI = Root.getParent(); @@ -3417,7 +3427,7 @@ Register VGPRSrc = MRI->cloneVirtualRegister(OrigSrc); BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(), TII.get(AMDGPU::COPY), VGPRSrc) - .addReg(Src); + .addReg(Src); Src = VGPRSrc; } @@ -4798,10 +4808,14 @@ Register Src; unsigned Mods; - std::tie(Src, Mods) = selectVOP3ModsImpl(Root); + std::tie(Src, Mods) = getVOP3ModsImpl(Root); MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); if (MI->getOpcode() == AMDGPU::G_FPEXT) { + // selectVOP3ModsImpl can create copy, make sure that select will succeed + // to avoid creating dead copy instruction. + std::tie(Src, Mods) = selectVOP3ModsImpl(Root); + MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); MachineOperand *MO = &MI->getOperand(1); Src = MO->getReg(); MI = getDefIgnoringCopies(Src, *MRI); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -128,6 +128,55 @@ ret float %b } +define amdgpu_ps float @dont_crash_after_fma_mix_select_attempt(float inreg %x, float %y, float %z) { +; GFX9-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX9-CONTRACT-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-CONTRACT-NEXT: ; return to shader part epilog +; +; GFX9-DENORM-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: v_mad_f32 v0, |s0|, v0, v1 +; GFX9-DENORM-NEXT: ; return to shader part epilog +; +; GFX9-UNSAFE-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-UNSAFE-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-NEXT: ; return to shader part epilog +; +; GFX10-CONTRACT-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-CONTRACT-NEXT: ; return to shader part epilog +; +; GFX10-DENORM-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-DENORM-NEXT: ; return to shader part epilog +; +; GFX10-UNSAFE-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-UNSAFE-NEXT: ; return to shader part epilog +.entry: + %abs_x = call contract float @llvm.fabs.f32(float %x) + %a = fmul contract float %abs_x, %y + %b = fadd contract float %a, %z + ret float %b +} + +declare float @llvm.fabs.f32(float) + define float @test_add_mul_multiple_defs_z(float %x, float %y, <2 x float> addrspace(1)* %vec_ptr) { ; GFX9-LABEL: test_add_mul_multiple_defs_z: ; GFX9: ; %bb.0: ; %.entry