diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -148,7 +148,11 @@ std::pair selectVOP3ModsImpl(MachineOperand &Root, bool AllowAbs = true, - bool OpSel = false, bool ForceVGPR = false) const; + bool OpSel = false) const; + + Register copyToVGPRIfSrcFolded(Register Src, unsigned Mods, + MachineOperand Root, MachineInstr *InsertPt, + bool ForceVGPR = false) const; InstructionSelector::ComplexRendererFns selectVCSRC(MachineOperand &Root) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -560,11 +560,11 @@ MachineInstr *MixInst = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpC), Dst) .addImm(Src0Mods) - .addReg(Src0) + .addReg(copyToVGPRIfSrcFolded(Src0, Src0Mods, I.getOperand(1), &I)) .addImm(Src1Mods) - .addReg(Src1) + .addReg(copyToVGPRIfSrcFolded(Src1, Src1Mods, I.getOperand(2), &I)) .addImm(Src2Mods) - .addReg(Src2) + .addReg(copyToVGPRIfSrcFolded(Src2, Src2Mods, I.getOperand(3), &I)) .addImm(0) .addImm(0) .addImm(0); @@ -3410,9 +3410,8 @@ } std::pair AMDGPUInstructionSelector::selectVOP3ModsImpl( - MachineOperand &Root, bool AllowAbs, bool OpSel, bool ForceVGPR) const { + MachineOperand &Root, bool AllowAbs, bool OpSel) const { Register Src = Root.getReg(); - Register OrigSrc = Src; unsigned Mods = 0; MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); @@ -3430,21 +3429,26 @@ if (OpSel) Mods |= SISrcMods::OP_SEL_0; + return std::make_pair(Src, Mods); +} + +Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded( + Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt, + bool ForceVGPR) const { if ((Mods != 0 || ForceVGPR) && RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) { - MachineInstr *UseMI = Root.getParent(); // If we looked through copies to find source modifiers on an SGPR operand, // we now have an SGPR register source. To avoid potentially violating the // constant bus restriction, we need to insert a copy to a VGPR. - Register VGPRSrc = MRI->cloneVirtualRegister(OrigSrc); - BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(), + Register VGPRSrc = MRI->cloneVirtualRegister(Root.getReg()); + BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(), TII.get(AMDGPU::COPY), VGPRSrc) - .addReg(Src); + .addReg(Src); Src = VGPRSrc; } - return std::make_pair(Src, Mods); + return Src; } /// @@ -3464,7 +3468,9 @@ std::tie(Src, Mods) = selectVOP3ModsImpl(Root); return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { + MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB)); + }, [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod @@ -3478,7 +3484,9 @@ std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false); return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { + MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB)); + }, [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod @@ -3501,8 +3509,10 @@ std::tie(Src, Mods) = selectVOP3ModsImpl(Root); return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods + [=](MachineInstrBuilder &MIB) { + MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB)); + }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods }}; } @@ -3513,7 +3523,9 @@ std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false); return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { + MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB)); + }, [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods }}; } @@ -3621,8 +3633,10 @@ return None; return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods + [=](MachineInstrBuilder &MIB) { + MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB)); + }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods }}; } @@ -3645,11 +3659,13 @@ unsigned Mods; std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false, - /* OpSel */ false, - /* ForceVGPR */ true); + /* OpSel */ false); return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { + MIB.addReg( + copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true)); + }, [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods }}; } @@ -3660,11 +3676,13 @@ unsigned Mods; std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false, - /* OpSel */ true, - /* ForceVGPR */ true); + /* OpSel */ true); return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { + MIB.addReg( + copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true)); + }, [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods }}; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll @@ -1037,6 +1037,37 @@ ret float %fma } +define amdgpu_ps float @dont_crash_after_fma_mix_select_attempt(float inreg %x, float %y, float %z) { +; GFX6-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX6: ; %bb.0: ; %.entry +; GFX6-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX8: ; %bb.0: ; %.entry +; GFX8-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: dont_crash_after_fma_mix_select_attempt: +; GFX11: ; %bb.0: ; %.entry +; GFX11-NEXT: v_fma_f32 v0, |s0|, v0, v1 +; GFX11-NEXT: ; return to shader part epilog +.entry: + %fabs.x = call contract float @llvm.fabs.f32(float %x) + %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z) + ret float %fma +} + declare half @llvm.fma.f16(half, half, half) #0 declare float @llvm.fma.f32(float, float, float) #0 declare double @llvm.fma.f64(double, double, double) #0