diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -692,6 +692,10 @@ int VAddrArgIndex = !add(NumDataArgs, NumDmaskArgs); int GradientArgIndex = !add(NumDataArgs, NumDmaskArgs, NumExtraAddrArgs); int CoordArgIndex = !add(NumDataArgs, NumDmaskArgs, NumExtraAddrArgs, NumGradientArgs); + int LodArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, -1); + int MipArgIndex = LodArgIndex; + int RsrcArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs); + int SampArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs); int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1); int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs); int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h @@ -9,7 +9,6 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H -#include "AMDGPUInstrInfo.h" #include "llvm/CodeGen/Register.h" #include @@ -26,36 +25,6 @@ bool isLegalVOP3PShuffleMask(ArrayRef Mask); -/// Return number of address arguments, and the number of gradients for an image -/// intrinsic. -inline std::pair -getImageNumVAddr(const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, - const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode) { - const AMDGPU::MIMGDimInfo *DimInfo - = AMDGPU::getMIMGDimInfo(ImageDimIntr->Dim); - - int NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0; - int NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0; - int NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0; - int NumVAddr = BaseOpcode->NumExtraArgs + NumGradients + NumCoords + NumLCM; - return {NumVAddr, NumGradients}; -} - -/// Return index of dmask in an gMIR image intrinsic -inline int getDMaskIdx(const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode, - int NumDefs) { - assert(!BaseOpcode->Atomic); - return NumDefs + 1 + (BaseOpcode->Store ? 1 : 0); -} - -/// Return first address operand index in a gMIR image intrinsic. -inline int getImageVAddrIdxBegin(const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode, - int NumDefs) { - if (BaseOpcode->Atomic) - return NumDefs + 1 + (BaseOpcode->AtomicX2 ? 2 : 1); - return getDMaskIdx(BaseOpcode, NumDefs) + 1; -} - } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -52,9 +52,26 @@ unsigned Intr; unsigned BaseOpcode; MIMGDim Dim; + + unsigned NumGradients; + unsigned NumDmask; + unsigned NumData; + unsigned NumVAddrs; + unsigned NumArgs; + + unsigned DMaskIndex; + unsigned VAddrStart; unsigned GradientStart; unsigned CoordStart; + unsigned LodIndex; + unsigned MipIndex; unsigned VAddrEnd; + unsigned RsrcIndex; + unsigned SampIndex; + unsigned UnormIndex; + unsigned TexFailCtrlIndex; + unsigned CachePolicyIndex; + unsigned GradientTyArg; unsigned CoordTyArg; }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1469,34 +1469,27 @@ unsigned IntrOpcode = Intr->BaseOpcode; const bool IsGFX10 = STI.getGeneration() >= AMDGPUSubtarget::GFX10; - const int VAddrIdx = getImageVAddrIdxBegin(BaseOpcode, - MI.getNumExplicitDefs()); - int NumVAddr, NumGradients; - std::tie(NumVAddr, NumGradients) = getImageNumVAddr(Intr, BaseOpcode); + const unsigned ArgOffset = MI.getNumExplicitDefs() + 1; Register VDataIn, VDataOut; LLT VDataTy; int NumVDataDwords = -1; bool IsD16 = false; - // XXX - Can we just get the second to last argument for ctrl? - unsigned CtrlIdx; // Index of texfailctrl argument bool Unorm; - if (!BaseOpcode->Sampler) { + if (!BaseOpcode->Sampler) Unorm = true; - CtrlIdx = VAddrIdx + NumVAddr + 1; - } else { - Unorm = MI.getOperand(VAddrIdx + NumVAddr + 2).getImm() != 0; - CtrlIdx = VAddrIdx + NumVAddr + 3; - } + else + Unorm = MI.getOperand(ArgOffset + Intr->UnormIndex).getImm() != 0; bool TFE; bool LWE; bool IsTexFail = false; - if (!parseTexFail(MI.getOperand(CtrlIdx).getImm(), TFE, LWE, IsTexFail)) + if (!parseTexFail(MI.getOperand(ArgOffset + Intr->TexFailCtrlIndex).getImm(), + TFE, LWE, IsTexFail)) return false; - const int Flags = MI.getOperand(CtrlIdx + 2).getImm(); + const int Flags = MI.getOperand(ArgOffset + Intr->NumArgs).getImm(); const bool IsA16 = (Flags & 1) != 0; const bool IsG16 = (Flags & 2) != 0; @@ -1527,9 +1520,7 @@ NumVDataDwords = Is64Bit ? 2 : 1; } } else { - const int DMaskIdx = 2; // Input/output + intrinsic ID. - - DMask = MI.getOperand(DMaskIdx).getImm(); + DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm(); DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask); if (BaseOpcode->Store) { @@ -1560,7 +1551,7 @@ if (LZMappingInfo) { // The legalizer replaced the register with an immediate 0 if we need to // change the opcode. - const MachineOperand &Lod = MI.getOperand(VAddrIdx + NumVAddr - 1); + const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->LodIndex); if (Lod.isImm()) { assert(Lod.getImm() == 0); IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l @@ -1569,7 +1560,7 @@ // Optimize _mip away, when 'lod' is zero if (MIPMappingInfo) { - const MachineOperand &Lod = MI.getOperand(VAddrIdx + NumVAddr - 1); + const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->MipIndex); if (Lod.isImm()) { assert(Lod.getImm() == 0); IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip @@ -1592,20 +1583,22 @@ bool DLC = false; if (BaseOpcode->Atomic) { GLC = true; // TODO no-return optimization - if (!parseCachePolicy(MI.getOperand(CtrlIdx + 1).getImm(), nullptr, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy( + MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), nullptr, + &SLC, IsGFX10 ? &DLC : nullptr)) return false; } else { - if (!parseCachePolicy(MI.getOperand(CtrlIdx + 1).getImm(), &GLC, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy( + MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), &GLC, + &SLC, IsGFX10 ? &DLC : nullptr)) return false; } int NumVAddrRegs = 0; int NumVAddrDwords = 0; - for (int I = 0; I < NumVAddr; ++I) { + for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) { // Skip the $noregs and 0s inserted during legalization. - MachineOperand &AddrOp = MI.getOperand(VAddrIdx + I); + MachineOperand &AddrOp = MI.getOperand(ArgOffset + I); if (!AddrOp.isReg()) continue; // XXX - Break? @@ -1668,17 +1661,17 @@ if (VDataIn) MIB.addReg(VDataIn); // vdata input - for (int i = 0; i != NumVAddrRegs; ++i) { - MachineOperand &SrcOp = MI.getOperand(VAddrIdx + i); + for (int I = 0; I != NumVAddrRegs; ++I) { + MachineOperand &SrcOp = MI.getOperand(ArgOffset + Intr->VAddrStart + I); if (SrcOp.isReg()) { assert(SrcOp.getReg() != 0); MIB.addReg(SrcOp.getReg()); } } - MIB.addReg(MI.getOperand(VAddrIdx + NumVAddr).getReg()); // rsrc + MIB.addReg(MI.getOperand(ArgOffset + Intr->RsrcIndex).getReg()); if (BaseOpcode->Sampler) - MIB.addReg(MI.getOperand(VAddrIdx + NumVAddr + 1).getReg()); // sampler + MIB.addReg(MI.getOperand(ArgOffset + Intr->SampIndex).getReg()); MIB.addImm(DMask); // dmask diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3783,38 +3783,39 @@ /// Turn a set of s16 typed registers in \p A16AddrRegs into a dword sized /// vector with s16 typed elements. -static void packImageA16AddressToDwords(MachineIRBuilder &B, MachineInstr &MI, - SmallVectorImpl &PackedAddrs, - int AddrIdx, int DimIdx, int EndIdx, - int NumGradients) { +static void packImageA16AddressToDwords( + MachineIRBuilder &B, MachineInstr &MI, + SmallVectorImpl &PackedAddrs, unsigned ArgOffset, + const AMDGPU::ImageDimIntrinsicInfo *Intr, unsigned EndIdx) { const LLT S16 = LLT::scalar(16); const LLT V2S16 = LLT::vector(2, 16); - for (int I = AddrIdx; I < EndIdx; ++I) { - MachineOperand &SrcOp = MI.getOperand(I); + for (unsigned I = Intr->VAddrStart; I < EndIdx; I++) { + MachineOperand &SrcOp = MI.getOperand(ArgOffset + I); if (!SrcOp.isReg()) continue; // _L to _LZ may have eliminated this. Register AddrReg = SrcOp.getReg(); - if (I < DimIdx) { + if (I < Intr->GradientStart) { AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0); PackedAddrs.push_back(AddrReg); } else { // Dz/dh, dz/dv and the last odd coord are packed with undef. Also, in 1D, // derivatives dx/dh and dx/dv are packed with undef. if (((I + 1) >= EndIdx) || - ((NumGradients / 2) % 2 == 1 && - (I == DimIdx + (NumGradients / 2) - 1 || - I == DimIdx + NumGradients - 1)) || + ((Intr->NumGradients / 2) % 2 == 1 && + (I == Intr->GradientStart + (Intr->NumGradients / 2) - 1 || + I == Intr->GradientStart + Intr->NumGradients - 1)) || // Check for _L to _LZ optimization - !MI.getOperand(I + 1).isReg()) { + !MI.getOperand(ArgOffset + I + 1).isReg()) { PackedAddrs.push_back( B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)}) .getReg(0)); } else { PackedAddrs.push_back( - B.buildBuildVector(V2S16, {AddrReg, MI.getOperand(I + 1).getReg()}) + B.buildBuildVector( + V2S16, {AddrReg, MI.getOperand(ArgOffset + I + 1).getReg()}) .getReg(0)); ++I; } @@ -3873,43 +3874,37 @@ /// the intrinsic's arguments. In cases like a16 addreses, this requires padding /// now unnecessary arguments with $noreg. bool AMDGPULegalizerInfo::legalizeImageIntrinsic( - MachineInstr &MI, MachineIRBuilder &B, - GISelChangeObserver &Observer, - const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const { + MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer, + const AMDGPU::ImageDimIntrinsicInfo *Intr) const { - const int NumDefs = MI.getNumExplicitDefs(); + const unsigned NumDefs = MI.getNumExplicitDefs(); + const unsigned ArgOffset = NumDefs + 1; bool IsTFE = NumDefs == 2; // We are only processing the operands of d16 image operations on subtargets // that use the unpacked register layout, or need to repack the TFE result. // TODO: Do we need to guard against already legalized intrinsics? const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = - AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode); + AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); MachineRegisterInfo *MRI = B.getMRI(); const LLT S32 = LLT::scalar(32); const LLT S16 = LLT::scalar(16); const LLT V2S16 = LLT::vector(2, 16); - // Index of first address argument - const int AddrIdx = getImageVAddrIdxBegin(BaseOpcode, NumDefs); - - int NumVAddrs, NumGradients; - std::tie(NumVAddrs, NumGradients) = getImageNumVAddr(ImageDimIntr, BaseOpcode); - const int DMaskIdx = BaseOpcode->Atomic ? -1 : - getDMaskIdx(BaseOpcode, NumDefs); unsigned DMask = 0; // Check for 16 bit addresses and pack if true. - int DimIdx = AddrIdx + BaseOpcode->NumExtraArgs; - LLT GradTy = MRI->getType(MI.getOperand(DimIdx).getReg()); - LLT AddrTy = MRI->getType(MI.getOperand(DimIdx + NumGradients).getReg()); + LLT GradTy = + MRI->getType(MI.getOperand(ArgOffset + Intr->GradientStart).getReg()); + LLT AddrTy = + MRI->getType(MI.getOperand(ArgOffset + Intr->CoordStart).getReg()); const bool IsG16 = GradTy == S16; const bool IsA16 = AddrTy == S16; int DMaskLanes = 0; if (!BaseOpcode->Atomic) { - DMask = MI.getOperand(DMaskIdx).getImm(); + DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm(); if (BaseOpcode->Gather4) { DMaskLanes = 4; } else if (DMask != 0) { @@ -3936,7 +3931,7 @@ if (IsTFE && DMask == 0) { DMask = 0x1; DMaskLanes = 1; - MI.getOperand(DMaskIdx).setImm(DMask); + MI.getOperand(ArgOffset + Intr->DMaskIndex).setImm(DMask); } if (BaseOpcode->Atomic) { @@ -3957,41 +3952,41 @@ } } - int CorrectedNumVAddrs = NumVAddrs; + unsigned CorrectedNumVAddrs = Intr->NumVAddrs; // Optimize _L to _LZ when _L is zero if (const AMDGPU::MIMGLZMappingInfo *LZMappingInfo = - AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) { + AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode)) { const ConstantFP *ConstantLod; - const int LodIdx = AddrIdx + NumVAddrs - 1; - if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_GFCst(ConstantLod))) { + if (mi_match(MI.getOperand(ArgOffset + Intr->LodIndex).getReg(), *MRI, + m_GFCst(ConstantLod))) { if (ConstantLod->isZero() || ConstantLod->isNegative()) { // Set new opcode to _lz variant of _l, and change the intrinsic ID. - ImageDimIntr = AMDGPU::getImageDimInstrinsicByBaseOpcode( - LZMappingInfo->LZ, ImageDimIntr->Dim); + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimInstrinsicByBaseOpcode(LZMappingInfo->LZ, + Intr->Dim); // The starting indexes should remain in the same place. - --NumVAddrs; --CorrectedNumVAddrs; - MI.getOperand(MI.getNumExplicitDefs()).setIntrinsicID( - static_cast(ImageDimIntr->Intr)); - MI.RemoveOperand(LodIdx); + MI.getOperand(MI.getNumExplicitDefs()) + .setIntrinsicID(static_cast(NewImageDimIntr->Intr)); + MI.RemoveOperand(ArgOffset + Intr->LodIndex); + Intr = NewImageDimIntr; } } } // Optimize _mip away, when 'lod' is zero - if (AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) { + if (AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode)) { int64_t ConstantLod; - const int LodIdx = AddrIdx + NumVAddrs - 1; - - if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_ICst(ConstantLod))) { + if (mi_match(MI.getOperand(ArgOffset + Intr->MipIndex).getReg(), *MRI, + m_ICst(ConstantLod))) { if (ConstantLod == 0) { // TODO: Change intrinsic opcode and remove operand instead or replacing // it with 0, as the _L to _LZ handling is done above. - MI.getOperand(LodIdx).ChangeToImmediate(0); + MI.getOperand(ArgOffset + Intr->MipIndex).ChangeToImmediate(0); --CorrectedNumVAddrs; } } @@ -4006,18 +4001,17 @@ } else if (!ST.hasG16()) return false; - if (NumVAddrs > 1) { + if (Intr->NumVAddrs > 1) { SmallVector PackedRegs; // Don't compress addresses for G16 - const int PackEndIdx = - IsA16 ? (AddrIdx + NumVAddrs) : (DimIdx + NumGradients); - packImageA16AddressToDwords(B, MI, PackedRegs, AddrIdx, DimIdx, - PackEndIdx, NumGradients); + const int PackEndIdx = IsA16 ? Intr->VAddrEnd : Intr->CoordStart; + packImageA16AddressToDwords(B, MI, PackedRegs, ArgOffset, Intr, + PackEndIdx); if (!IsA16) { // Add uncompressed address - for (int I = DimIdx + NumGradients; I != AddrIdx + NumVAddrs; ++I) { - int AddrReg = MI.getOperand(I).getReg(); + for (unsigned I = Intr->CoordStart; I < Intr->VAddrEnd; I++) { + int AddrReg = MI.getOperand(ArgOffset + I).getReg(); assert(B.getMRI()->getType(AddrReg) == LLT::scalar(32)); PackedRegs.push_back(AddrReg); } @@ -4033,9 +4027,9 @@ PackedRegs.resize(1); } - const int NumPacked = PackedRegs.size(); - for (int I = 0; I != NumVAddrs; ++I) { - MachineOperand &SrcOp = MI.getOperand(AddrIdx + I); + const unsigned NumPacked = PackedRegs.size(); + for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) { + MachineOperand &SrcOp = MI.getOperand(ArgOffset + I); if (!SrcOp.isReg()) { assert(SrcOp.isImm() && SrcOp.getImm() == 0); continue; @@ -4043,8 +4037,8 @@ assert(SrcOp.getReg() != AMDGPU::NoRegister); - if (I < NumPacked) - SrcOp.setReg(PackedRegs[I]); + if (I - Intr->VAddrStart < NumPacked) + SrcOp.setReg(PackedRegs[I - Intr->VAddrStart]); else SrcOp.setReg(AMDGPU::NoRegister); } @@ -4063,8 +4057,9 @@ // allocation when possible. const bool UseNSA = CorrectedNumVAddrs >= 3 && ST.hasNSAEncoding(); - if (!UseNSA && NumVAddrs > 1) - convertImageAddrToPacked(B, MI, AddrIdx, NumVAddrs); + if (!UseNSA && Intr->NumVAddrs > 1) + convertImageAddrToPacked(B, MI, ArgOffset + Intr->VAddrStart, + Intr->NumVAddrs); } int Flags = 0; diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -842,9 +842,25 @@ AMDGPUDimProps Dim = I.P.Dim; AMDGPUImageDimIntrinsicEval DimEval = AMDGPUImageDimIntrinsicEval; + bits<8> NumGradients = DimEval.NumGradientArgs; + bits<8> NumDmask = DimEval.NumDmaskArgs; + bits<8> NumData = DimEval.NumDataArgs; + bits<8> NumVAddrs = DimEval.NumVAddrArgs; + bits<8> NumArgs = !add(DimEval.CachePolicyArgIndex, 1); + + bits<8> DMaskIndex = DimEval.DmaskArgIndex; + bits<8> VAddrStart = DimEval.VAddrArgIndex; bits<8> GradientStart = DimEval.GradientArgIndex; bits<8> CoordStart = DimEval.CoordArgIndex; + bits<8> LodIndex = DimEval.LodArgIndex; + bits<8> MipIndex = DimEval.MipArgIndex; bits<8> VAddrEnd = !add(DimEval.VAddrArgIndex, DimEval.NumVAddrArgs); + bits<8> RsrcIndex = DimEval.RsrcArgIndex; + bits<8> SampIndex = DimEval.SampArgIndex; + bits<8> UnormIndex = DimEval.UnormArgIndex; + bits<8> TexFailCtrlIndex = DimEval.TexFailCtrlArgIndex; + bits<8> CachePolicyIndex = DimEval.CachePolicyArgIndex; + bits<8> GradientTyArg = !add(I.P.NumRetAndDataAnyTypes, !foldl(0, I.P.ExtraAddrArgs, cnt, arg, !add(cnt, arg.Type.isAny))); bits<8> CoordTyArg = !add(GradientTyArg, !if(I.P.Gradients, 1, 0)); @@ -852,7 +868,10 @@ def ImageDimIntrinsicTable : GenericTable { let FilterClass = "ImageDimIntrinsicInfo"; - let Fields = ["Intr", "BaseOpcode", "Dim", "GradientStart", "CoordStart", "VAddrEnd", "GradientTyArg", "CoordTyArg"]; + let Fields = ["Intr", "BaseOpcode", "Dim", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs", + "DMaskIndex", "VAddrStart", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd", + "RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex", + "GradientTyArg", "CoordTyArg"]; GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode; GenericEnum TypeOf_Dim = MIMGDim; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -59,7 +59,7 @@ SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, MVT VT, unsigned Offset) const; SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, - SelectionDAG &DAG) const; + SelectionDAG &DAG, bool WithChain) const; SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset, SDValue CachePolicy, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5848,7 +5848,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, - SelectionDAG &DAG) const { + SelectionDAG &DAG, bool WithChain) const { SDLoc DL(Op); MachineFunction &MF = DAG.getMachineFunction(); const GCNSubtarget* ST = &MF.getSubtarget(); @@ -5871,7 +5871,9 @@ int NumVDataDwords; bool AdjustRetType = false; - unsigned AddrIdx; // Index of first address argument + // Offset of intrinsic arguments + const unsigned ArgOffset = WithChain ? 2 : 1; + unsigned DMask; unsigned DMaskLanes = 0; @@ -5889,15 +5891,13 @@ ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32; DMask = Is64Bit ? 0xf : 0x3; NumVDataDwords = Is64Bit ? 4 : 2; - AddrIdx = 4; } else { DMask = Is64Bit ? 0x3 : 0x1; NumVDataDwords = Is64Bit ? 2 : 1; - AddrIdx = 3; } } else { - unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa(Op) ? 2 : 1; - auto DMaskConst = cast(Op.getOperand(DMaskIdx)); + auto DMaskConst = + cast(Op.getOperand(ArgOffset + Intr->DMaskIndex)); DMask = DMaskConst->getZExtValue(); DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask); @@ -5937,56 +5937,45 @@ AdjustRetType = true; } - - AddrIdx = DMaskIdx + 1; } - unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0; - unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0; - unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0; - unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients + - NumCoords + NumLCM; - unsigned NumMIVAddrs = NumVAddrs; - + unsigned VAddrEnd = ArgOffset + Intr->VAddrEnd; SmallVector VAddrs; // Optimize _L to _LZ when _L is zero if (LZMappingInfo) { - if (auto ConstantLod = - dyn_cast(Op.getOperand(AddrIdx+NumVAddrs-1))) { + if (auto ConstantLod = dyn_cast( + Op.getOperand(ArgOffset + Intr->LodIndex))) { if (ConstantLod->isZero() || ConstantLod->isNegative()) { IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l - NumMIVAddrs--; // remove 'lod' + VAddrEnd--; // remove 'lod' } } } // Optimize _mip away, when 'lod' is zero if (MIPMappingInfo) { - if (auto ConstantLod = - dyn_cast(Op.getOperand(AddrIdx+NumVAddrs-1))) { + if (auto ConstantLod = dyn_cast( + Op.getOperand(ArgOffset + Intr->MipIndex))) { if (ConstantLod->isNullValue()) { IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip - NumMIVAddrs--; // remove 'lod' + VAddrEnd--; // remove 'mip' } } } // Push back extra arguments. - for (unsigned I = 0; I < BaseOpcode->NumExtraArgs; I++) - VAddrs.push_back(Op.getOperand(AddrIdx + I)); + for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) + VAddrs.push_back(Op.getOperand(ArgOffset + I)); // Check for 16 bit addresses or derivatives and pack if true. - unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs; - unsigned CoordIdx = DimIdx + NumGradients; - unsigned CoordsEnd = AddrIdx + NumMIVAddrs; - - MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType(); + MVT VAddrVT = + Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType(); MVT VAddrScalarVT = VAddrVT.getScalarType(); MVT PackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16; IsG16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16; - VAddrVT = Op.getOperand(CoordIdx).getSimpleValueType(); + VAddrVT = Op.getOperand(ArgOffset + Intr->CoordStart).getSimpleValueType(); VAddrScalarVT = VAddrVT.getScalarType(); IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16; if (IsA16 || IsG16) { @@ -6021,17 +6010,18 @@ } // Don't compress addresses for G16 - const int PackEndIdx = IsA16 ? CoordsEnd : CoordIdx; - packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs, DimIdx, - PackEndIdx, NumGradients); + const int PackEndIdx = IsA16 ? VAddrEnd : (ArgOffset + Intr->CoordStart); + packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs, + ArgOffset + Intr->GradientStart, PackEndIdx, + Intr->NumGradients); if (!IsA16) { // Add uncompressed address - for (unsigned I = CoordIdx; I < CoordsEnd; I++) + for (unsigned I = ArgOffset + Intr->CoordStart; I < VAddrEnd; I++) VAddrs.push_back(Op.getOperand(I)); } } else { - for (unsigned I = DimIdx; I < CoordsEnd; I++) + for (unsigned I = ArgOffset + Intr->GradientStart; I < VAddrEnd; I++) VAddrs.push_back(Op.getOperand(I)); } @@ -6054,22 +6044,19 @@ SDValue True = DAG.getTargetConstant(1, DL, MVT::i1); SDValue False = DAG.getTargetConstant(0, DL, MVT::i1); - unsigned CtrlIdx; // Index of texfailctrl argument SDValue Unorm; if (!BaseOpcode->Sampler) { Unorm = True; - CtrlIdx = AddrIdx + NumVAddrs + 1; } else { auto UnormConst = - cast(Op.getOperand(AddrIdx + NumVAddrs + 2)); + cast(Op.getOperand(ArgOffset + Intr->UnormIndex)); Unorm = UnormConst->getZExtValue() ? True : False; - CtrlIdx = AddrIdx + NumVAddrs + 3; } SDValue TFE; SDValue LWE; - SDValue TexFail = Op.getOperand(CtrlIdx); + SDValue TexFail = Op.getOperand(ArgOffset + Intr->TexFailCtrlIndex); bool IsTexFail = false; if (!parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail)) return Op; @@ -6116,12 +6103,12 @@ SDValue DLC; if (BaseOpcode->Atomic) { GLC = True; // TODO no-return optimization - if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex), + DAG, nullptr, &SLC, IsGFX10 ? &DLC : nullptr)) return Op; } else { - if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex), + DAG, &GLC, &SLC, IsGFX10 ? &DLC : nullptr)) return Op; } @@ -6134,9 +6121,9 @@ } else { Ops.push_back(VAddr); } - Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc + Ops.push_back(Op.getOperand(ArgOffset + Intr->RsrcIndex)); if (BaseOpcode->Sampler) - Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler + Ops.push_back(Op.getOperand(ArgOffset + Intr->SampIndex)); Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32)); if (IsGFX10) Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32)); @@ -6617,7 +6604,7 @@ default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) - return lowerImage(Op, ImageDimIntr, DAG); + return lowerImage(Op, ImageDimIntr, DAG, false); return Op; } @@ -7172,7 +7159,7 @@ default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) - return lowerImage(Op, ImageDimIntr, DAG); + return lowerImage(Op, ImageDimIntr, DAG, true); return SDValue(); } @@ -7530,7 +7517,7 @@ default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) - return lowerImage(Op, ImageDimIntr, DAG); + return lowerImage(Op, ImageDimIntr, DAG, true); return Op; }