Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -146,6 +146,9 @@ bool selectSMFMACIntrin(MachineInstr &I) const; bool selectWaveAddress(MachineInstr &I) const; + std::pair getVOP3ModsImpl(MachineOperand &Root, + bool AllowAbs = true, + bool OpSel = false) const; std::pair selectVOP3ModsImpl(MachineOperand &Root, bool AllowAbs = true, bool OpSel = false, bool ForceVGPR = false) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -533,6 +533,23 @@ (IsFMA ? !Subtarget->hasFmaMixInsts() : !Subtarget->hasMadMixInsts())) return false; + Register Src; + unsigned Mods; + bool HasFpext = false; + + for (unsigned i = 1; i <= 3; ++i) { + std::tie(Src, Mods) = getVOP3ModsImpl(I.getOperand(i)); + MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); + if (MI->getOpcode() == AMDGPU::G_FPEXT) { + HasFpext = true; + break; + } + } + // TODO: We can select this with f32 denormals enabled if all the sources are + // converted from f16 (in which case fmad isn't legal). + if(!HasFpext) + return false; + // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand // using the conversion from f16. bool MatchedSrc0, MatchedSrc1, MatchedSrc2; @@ -551,11 +568,6 @@ "fmad selected with denormals enabled"); #endif - // TODO: We can select this with f32 denormals enabled if all the sources are - // converted from f16 (in which case fmad isn't legal). - if (!MatchedSrc0 && !MatchedSrc1 && !MatchedSrc2) - return false; - const unsigned OpC = IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32; MachineInstr *MixInst = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpC), Dst) @@ -3386,10 +3398,10 @@ } -std::pair AMDGPUInstructionSelector::selectVOP3ModsImpl( - MachineOperand &Root, bool AllowAbs, bool OpSel, bool ForceVGPR) const { +std::pair +AMDGPUInstructionSelector::getVOP3ModsImpl(MachineOperand &Root, bool AllowAbs, + bool OpSel) const { Register Src = Root.getReg(); - Register OrigSrc = Src; unsigned Mods = 0; MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); @@ -3407,6 +3419,15 @@ if (OpSel) Mods |= SISrcMods::OP_SEL_0; + return std::make_pair(Src, Mods); +} + +std::pair AMDGPUInstructionSelector::selectVOP3ModsImpl( + MachineOperand &Root, bool AllowAbs, bool OpSel, bool ForceVGPR) const { + Register OrigSrc = Root.getReg(); + Register Src; + unsigned Mods; + std::tie(Src, Mods) = getVOP3ModsImpl(Root, AllowAbs, OpSel); if ((Mods != 0 || ForceVGPR) && RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) { MachineInstr *UseMI = Root.getParent(); @@ -3417,7 +3438,7 @@ Register VGPRSrc = MRI->cloneVirtualRegister(OrigSrc); BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(), TII.get(AMDGPU::COPY), VGPRSrc) - .addReg(Src); + .addReg(Src); Src = VGPRSrc; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -128,6 +128,55 @@ ret float %b } +define amdgpu_ps float @dont_crash_after_fma_mix_select_attempt(float inreg %x, float %y, float %z) { +; GFX9-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX9-CONTRACT-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-CONTRACT-NEXT: ; return to shader part epilog +; +; GFX9-DENORM-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: v_mad_f32 v0, |s0|, v0, v1 +; GFX9-DENORM-NEXT: ; return to shader part epilog +; +; GFX9-UNSAFE-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-UNSAFE-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-NEXT: ; return to shader part epilog +; +; GFX10-CONTRACT-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-CONTRACT-NEXT: ; return to shader part epilog +; +; GFX10-DENORM-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-DENORM-NEXT: ; return to shader part epilog +; +; GFX10-UNSAFE-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-UNSAFE-NEXT: ; return to shader part epilog +.entry: + %abs_x = call contract float @llvm.fabs.f32(float %x) + %a = fmul contract float %abs_x, %y + %b = fadd contract float %a, %z + ret float %b +} + +declare float @llvm.fabs.f32(float) + define float @test_add_mul_multiple_defs_z(float %x, float %y, <2 x float> addrspace(1)* %vec_ptr) { ; GFX9-LABEL: test_add_mul_multiple_defs_z: ; GFX9: ; %bb.0: ; %.entry