diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -50,7 +50,6 @@ FunctionPass *createSIFoldOperandsPass(); FunctionPass *createSIPeepholeSDWAPass(); FunctionPass *createSILowerI1CopiesPass(); -FunctionPass *createSIAddIMGInitPass(); FunctionPass *createSIShrinkInstructionsPass(); FunctionPass *createSILoadStoreOptimizerPass(); FunctionPass *createSIWholeQuadModePass(); @@ -222,9 +221,6 @@ void initializeAMDGPUUseNativeCallsPass(PassRegistry &); extern char &AMDGPUUseNativeCallsID; -void initializeSIAddIMGInitPass(PassRegistry &); -extern char &SIAddIMGInitID; - void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); extern char &AMDGPUPerfHintAnalysisID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -264,7 +264,6 @@ initializeAMDGPUPrintfRuntimeBindingPass(*PR); initializeGCNRegBankReassignPass(*PR); initializeGCNNSAReassignPass(*PR); - initializeSIAddIMGInitPass(*PR); } static std::unique_ptr createTLOF(const Triple &TT) { diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -110,7 +110,6 @@ R600OptimizeVectorRegisters.cpp R600Packetizer.cpp R600RegisterInfo.cpp - SIAddIMGInit.cpp SIAnnotateControlFlow.cpp SIFixSGPRCopies.cpp SIFixVGPRCopies.cpp diff --git a/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp b/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp deleted file mode 100644 --- a/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp +++ /dev/null @@ -1,169 +0,0 @@ -//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Any MIMG instructions that use tfe or lwe require an initialization of the -/// result register that will be written in the case of a memory access failure -/// The required code is also added to tie this init code to the result of the -/// img instruction -/// -//===----------------------------------------------------------------------===// -// - -#include "AMDGPU.h" -#include "GCNSubtarget.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "llvm/CodeGen/MachineFunctionPass.h" - -#define DEBUG_TYPE "si-img-init" - -using namespace llvm; - -namespace { - -class SIAddIMGInit : public MachineFunctionPass { -public: - static char ID; - -public: - SIAddIMGInit() : MachineFunctionPass(ID) { - initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } -}; - -} // End anonymous namespace. - -INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false) - -char SIAddIMGInit::ID = 0; - -char &llvm::SIAddIMGInitID = SIAddIMGInit::ID; - -FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); } - -bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { - MachineRegisterInfo &MRI = MF.getRegInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); - const SIInstrInfo *TII = ST.getInstrInfo(); - const SIRegisterInfo *RI = ST.getRegisterInfo(); - bool Changed = false; - - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; - ++BI) { - MachineBasicBlock &MBB = *BI; - MachineBasicBlock::iterator I, Next; - for (I = MBB.begin(); I != MBB.end(); I = Next) { - Next = std::next(I); - MachineInstr &MI = *I; - - auto Opcode = MI.getOpcode(); - if (TII->isMIMG(Opcode) && !MI.mayStore()) { - MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe); - MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe); - MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16); - - if (!TFE && !LWE) // intersect_ray - continue; - - unsigned TFEVal = TFE ? TFE->getImm() : 0; - unsigned LWEVal = LWE->getImm(); - unsigned D16Val = D16 ? D16->getImm() : 0; - - if (TFEVal || LWEVal) { - // At least one of TFE or LWE are non-zero - // We have to insert a suitable initialization of the result value and - // tie this to the dest of the image instruction. - - const DebugLoc &DL = MI.getDebugLoc(); - - int DstIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); - - // Calculate which dword we have to initialize to 0. - MachineOperand *MO_Dmask = - TII->getNamedOperand(MI, AMDGPU::OpName::dmask); - - // check that dmask operand is found. - assert(MO_Dmask && "Expected dmask operand in instruction"); - - unsigned dmask = MO_Dmask->getImm(); - // Determine the number of active lanes taking into account the - // Gather4 special case - unsigned ActiveLanes = - TII->isGather4(Opcode) ? 4 : countPopulation(dmask); - - bool Packed = !ST.hasUnpackedD16VMem(); - - unsigned InitIdx = - D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1; - - // Abandon attempt if the dst size isn't large enough - // - this is in fact an error but this is picked up elsewhere and - // reported correctly. - uint32_t DstSize = - RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; - if (DstSize < InitIdx) - continue; - - // Create a register for the intialization value. - Register PrevDst = - MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); - unsigned NewDst = 0; // Final initialized value will be in here - - // If PRTStrictNull feature is enabled (the default) then initialize - // all the result registers to 0, otherwise just the error indication - // register (VGPRn+1) - unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1; - unsigned CurrIdx = ST.usePRTStrictNull() ? 0 : (InitIdx - 1); - - if (DstSize == 1) { - // In this case we can just initialize the result directly - BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst) - .addImm(0); - NewDst = PrevDst; - } else { - BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst); - for (; SizeLeft; SizeLeft--, CurrIdx++) { - NewDst = - MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); - // Initialize dword - Register SubReg = - MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) - .addImm(0); - // Insert into the super-reg - BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst) - .addReg(PrevDst) - .addReg(SubReg) - .addImm(SIRegisterInfo::getSubRegFromChannel(CurrIdx)); - - PrevDst = NewDst; - } - } - - // Add as an implicit operand - MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit); - - // Tie the just added implicit operand to the dst - MI.tieOperands(DstIdx, MI.getNumOperands() - 1); - - Changed = true; - } - } - } - } - - return Changed; -}