Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -148,7 +148,10 @@ std::pair selectVOP3ModsImpl(MachineOperand &Root, bool AllowAbs = true, - bool OpSel = false, bool ForceVGPR = false) const; + bool OpSel = false) const; + + Register copyToVGPR(Register Src, unsigned Mods, MachineOperand Root, + bool ForceVGPR = false) const; InstructionSelector::ComplexRendererFns selectVCSRC(MachineOperand &Root) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -560,11 +560,11 @@ MachineInstr *MixInst = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpC), Dst) .addImm(Src0Mods) - .addReg(Src0) + .addReg(copyToVGPR(Src0, Src0Mods, I.getOperand(1))) .addImm(Src1Mods) - .addReg(Src1) + .addReg(copyToVGPR(Src1, Src1Mods, I.getOperand(2))) .addImm(Src2Mods) - .addReg(Src2) + .addReg(copyToVGPR(Src2, Src2Mods, I.getOperand(3))) .addImm(0) .addImm(0) .addImm(0); @@ -3387,9 +3387,8 @@ } std::pair AMDGPUInstructionSelector::selectVOP3ModsImpl( - MachineOperand &Root, bool AllowAbs, bool OpSel, bool ForceVGPR) const { + MachineOperand &Root, bool AllowAbs, bool OpSel) const { Register Src = Root.getReg(); - Register OrigSrc = Src; unsigned Mods = 0; MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); @@ -3407,6 +3406,12 @@ if (OpSel) Mods |= SISrcMods::OP_SEL_0; + return std::make_pair(Src, Mods); +} + +Register AMDGPUInstructionSelector::copyToVGPR(Register Src, unsigned Mods, + MachineOperand Root, + bool ForceVGPR) const { if ((Mods != 0 || ForceVGPR) && RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) { MachineInstr *UseMI = Root.getParent(); @@ -3414,14 +3419,14 @@ // If we looked through copies to find source modifiers on an SGPR operand, // we now have an SGPR register source. To avoid potentially violating the // constant bus restriction, we need to insert a copy to a VGPR. - Register VGPRSrc = MRI->cloneVirtualRegister(OrigSrc); + Register VGPRSrc = MRI->cloneVirtualRegister(Root.getReg()); BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(), TII.get(AMDGPU::COPY), VGPRSrc) - .addReg(Src); + .addReg(Src); Src = VGPRSrc; } - return std::make_pair(Src, Mods); + return Src; } /// @@ -3439,6 +3444,7 @@ Register Src; unsigned Mods; std::tie(Src, Mods) = selectVOP3ModsImpl(Root); + Src = copyToVGPR(Src, Mods, Root); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, @@ -3453,6 +3459,7 @@ Register Src; unsigned Mods; std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false); + Src = copyToVGPR(Src, Mods, Root); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, @@ -3476,6 +3483,7 @@ Register Src; unsigned Mods; std::tie(Src, Mods) = selectVOP3ModsImpl(Root); + Src = copyToVGPR(Src, Mods, Root); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, @@ -3488,6 +3496,7 @@ Register Src; unsigned Mods; std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false); + Src = copyToVGPR(Src, Mods, Root); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, @@ -3597,6 +3606,7 @@ if (!isKnownNeverNaN(Src, *MRI)) return None; + Src = copyToVGPR(Src, Mods, Root); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods @@ -3618,8 +3628,8 @@ unsigned Mods; std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false, - /* OpSel */ false, - /* ForceVGPR */ true); + /* OpSel */ false); + Src = copyToVGPR(Src, Mods, Root, /* ForceVGPR */ true); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, @@ -3633,8 +3643,8 @@ unsigned Mods; std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false, - /* OpSel */ true, - /* ForceVGPR */ true); + /* OpSel */ true); + Src = copyToVGPR(Src, Mods, Root, /* ForceVGPR */ true); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -128,6 +128,55 @@ ret float %b } +define amdgpu_ps float @dont_crash_after_fma_mix_select_attempt(float inreg %x, float %y, float %z) { +; GFX9-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX9-CONTRACT-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-CONTRACT-NEXT: ; return to shader part epilog +; +; GFX9-DENORM-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: v_mad_f32 v0, |s0|, v0, v1 +; GFX9-DENORM-NEXT: ; return to shader part epilog +; +; GFX9-UNSAFE-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-UNSAFE-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-NEXT: ; return to shader part epilog +; +; GFX10-CONTRACT-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-CONTRACT-NEXT: ; return to shader part epilog +; +; GFX10-DENORM-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-DENORM-NEXT: ; return to shader part epilog +; +; GFX10-UNSAFE-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-UNSAFE-NEXT: ; return to shader part epilog +.entry: + %abs_x = call contract float @llvm.fabs.f32(float %x) + %a = fmul contract float %abs_x, %y + %b = fadd contract float %a, %z + ret float %b +} + +declare float @llvm.fabs.f32(float) + define float @test_add_mul_multiple_defs_z(float %x, float %y, <2 x float> addrspace(1)* %vec_ptr) { ; GFX9-LABEL: test_add_mul_multiple_defs_z: ; GFX9: ; %bb.0: ; %.entry