diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -15,10 +15,10 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "GCNHazardRecognizer.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -28,6 +28,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -2841,6 +2842,18 @@ return AMDGPU::NoRegister; } +static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, + MachineInstr &NewMI) { + if (LV) { + unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I < NumOps; ++I) { + MachineOperand &Op = MI.getOperand(I); + if (Op.isReg() && Op.isKill()) + LV->replaceKillInstruction(Op.getReg(), MI, NewMI); + } + } +} + MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const { @@ -2888,43 +2901,53 @@ const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); + MachineInstrBuilder MIB; if (!Src0Mods && !Src1Mods && !Clamp && !Omod && // If we have an SGPR input, we will violate the constant bus restriction. - (ST.getConstantBusLimit(Opc) > 1 || - !Src0->isReg() || + (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) { if (auto Imm = getFoldableImm(Src2)) { unsigned NewOpc = - IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32) - : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); - if (pseudoToMCOpcode(NewOpc) != -1) - return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) - .add(*Dst) - .add(*Src0) - .add(*Src1) - .addImm(Imm); - } - unsigned NewOpc = - IsFMA ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32) - : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); + IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32) + : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); + if (pseudoToMCOpcode(NewOpc) != -1) { + MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) + .add(*Dst) + .add(*Src0) + .add(*Src1) + .addImm(Imm); + updateLiveVariables(LV, MI, *MIB); + return MIB; + } + } + unsigned NewOpc = IsFMA + ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32) + : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); if (auto Imm = getFoldableImm(Src1)) { - if (pseudoToMCOpcode(NewOpc) != -1) - return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) - .add(*Dst) - .add(*Src0) - .addImm(Imm) - .add(*Src2); + if (pseudoToMCOpcode(NewOpc) != -1) { + MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) + .add(*Dst) + .add(*Src0) + .addImm(Imm) + .add(*Src2); + updateLiveVariables(LV, MI, *MIB); + return MIB; + } } if (auto Imm = getFoldableImm(Src0)) { if (pseudoToMCOpcode(NewOpc) != -1 && - isOperandLegal(MI, AMDGPU::getNamedOperandIdx(NewOpc, - AMDGPU::OpName::src0), Src1)) - return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) - .add(*Dst) - .add(*Src1) - .addImm(Imm) - .add(*Src2); + isOperandLegal( + MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0), + Src1)) { + MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) + .add(*Dst) + .add(*Src1) + .addImm(Imm) + .add(*Src2); + updateLiveVariables(LV, MI, *MIB); + return MIB; + } } } @@ -2933,16 +2956,18 @@ if (pseudoToMCOpcode(NewOpc) == -1) return nullptr; - return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) - .add(*Dst) - .addImm(Src0Mods ? Src0Mods->getImm() : 0) - .add(*Src0) - .addImm(Src1Mods ? Src1Mods->getImm() : 0) - .add(*Src1) - .addImm(0) // Src mods - .add(*Src2) - .addImm(Clamp ? Clamp->getImm() : 0) - .addImm(Omod ? Omod->getImm() : 0); + MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) + .add(*Dst) + .addImm(Src0Mods ? Src0Mods->getImm() : 0) + .add(*Src0) + .addImm(Src1Mods ? Src1Mods->getImm() : 0) + .add(*Src1) + .addImm(0) // Src mods + .add(*Src2) + .addImm(Clamp ? Clamp->getImm() : 0) + .addImm(Omod ? Omod->getImm() : 0); + updateLiveVariables(LV, MI, *MIB); + return MIB; } // It's not generally safe to move VALU instructions across these since it will diff --git a/llvm/test/CodeGen/AMDGPU/stale-livevar-in-twoaddr-pass.mir b/llvm/test/CodeGen/AMDGPU/stale-livevar-in-twoaddr-pass.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/stale-livevar-in-twoaddr-pass.mir @@ -0,0 +1,36 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=livevars,phi-node-elimination,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck %s +# This used to fail under ASAN enabled build because we didn't update LiveVariables in SIInstrInfo::convertToThreeAddress() +# CHECK: _amdgpu_ps_main + +--- +name: _amdgpu_ps_main +alignment: 1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr2, $vgpr2, $vgpr3 + + %0:vgpr_32 = COPY $vgpr3 + %1:vgpr_32 = COPY $vgpr2 + S_BRANCH %bb.3 + + bb.1: + %2:vgpr_32 = V_MAC_F32_e32 0, %0, %1, implicit $mode, implicit $exec + %3:vgpr_32 = V_MED3_F32 0, %1, 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec + + bb.2: + %4:vgpr_32 = PHI %5, %bb.3, %3, %bb.1 + SI_END_CF %6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + EXP_DONE 0, %4, %4, %4, %4, -1, 0, 15, implicit $exec + S_ENDPGM 0 + + bb.3: + successors: %bb.1, %bb.2 + + %5:vgpr_32 = V_MAC_F32_e32 0, %1, %0, implicit $mode, implicit $exec + %7:vgpr_32 = V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec + %8:sreg_64 = V_CMP_NE_U32_e64 1, %7, implicit $exec + %6:sreg_64 = SI_IF %8, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 + +...