Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -606,6 +606,7 @@ // Represent texture/image types / dimensionality. class AMDGPUDimProps coord_names, list slice_names> { + AMDGPUDimProps Dim = !cast(NAME); string Name = name; // e.g. "2darraymsaa" bit DA = 0; // DA bit in MIMG encoding @@ -617,6 +618,9 @@ makeArgList.ret; + + bits<8> NumCoords = !size(CoordSliceArgs); + bits<8> NumGradients = !size(GradientArgs); } def AMDGPUDim1D : AMDGPUDimProps<"1d", ["s"], []>; Index: lib/Target/AMDGPU/AMDGPUInstrInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -57,6 +57,7 @@ namespace AMDGPU { #define GET_MIMGBASEOPCODE_DECL +#define GET_MIMGDIM_DECL #define GET_MIMGENCODING_DECL #include "AMDGPUGenSearchableTables.inc" @@ -73,6 +74,36 @@ }; const D16ImageDimIntrinsic *lookupD16ImageDimIntrinsic(unsigned Intr); +struct MIMGBaseOpcodeInfo { + MIMGBaseOpcode BaseOpcode; + bool Store; + bool Atomic; + bool AtomicX2; + bool Sampler; + + uint8_t NumExtraArgs; + bool Gradients; + bool Coordinates; + bool LodOrClampOrMip; + bool HasD16; +}; +const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); + +struct MIMGDimInfo { + MIMGDim Dim; + uint8_t NumCoords; + uint8_t NumGradients; + bool DA; +}; +const MIMGDimInfo *getMIMGDimInfo(unsigned Dim); + +struct ImageDimIntrinsicInfo { + unsigned Intr; + unsigned BaseOpcode; + MIMGDim Dim; +}; +const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr); + } // end AMDGPU namespace } // End llvm namespace Index: lib/Target/AMDGPU/AMDGPUInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -28,10 +28,11 @@ namespace llvm { namespace AMDGPU { -#define GET_RSRCINTRINSICS_IMPL -#include "AMDGPUGenSearchableTables.inc" - #define GET_D16IMAGEDIMINTRINSICS_IMPL +#define GET_IMAGEDIMINTRINSICTABLE_IMPL +#define GET_MIMGBASEOPCODESTABLE_IMPL +#define GET_MIMGDIMINFOTABLE_IMPL +#define GET_RSRCINTRINSICS_IMPL #include "AMDGPUGenSearchableTables.inc" } } Index: lib/Target/AMDGPU/AMDGPUSearchableTables.td =================================================================== --- lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -91,22 +91,3 @@ foreach intr = AMDGPUImageDimAtomicIntrinsics in def : SourceOfDivergence; - -class D16ImageDimIntrinsic { - Intrinsic Intr = intr; - code D16HelperIntr = - !cast("AMDGPUIntrinsic::SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name); -} - -def D16ImageDimIntrinsics : GenericTable { - let FilterClass = "D16ImageDimIntrinsic"; - let Fields = ["Intr", "D16HelperIntr"]; - - let PrimaryKey = ["Intr"]; - let PrimaryKeyName = "lookupD16ImageDimIntrinsic"; -} - -foreach intr = !listconcat(AMDGPUImageDimIntrinsics, - AMDGPUImageDimGatherIntrinsics) in { - def : D16ImageDimIntrinsic; -} Index: lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- lib/Target/AMDGPU/MIMGInstructions.td +++ lib/Target/AMDGPU/MIMGInstructions.td @@ -27,6 +27,10 @@ // vdata/vaddr size. class MIMGBaseOpcode { MIMGBaseOpcode BaseOpcode = !cast(NAME); + bit Store = 0; + bit Atomic = 0; + bit AtomicX2 = 0; // (f)cmpswap + bit Sampler = 0; bits<8> NumExtraArgs = 0; bit Gradients = 0; bit Coordinates = 1; @@ -41,14 +45,29 @@ def MIMGBaseOpcodesTable : GenericTable { let FilterClass = "MIMGBaseOpcode"; let CppTypeName = "MIMGBaseOpcodeInfo"; - let Fields = ["BaseOpcode", "NumExtraArgs", "Gradients", "Coordinates", - "LodOrClampOrMip", "HasD16"]; + let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler", + "NumExtraArgs", "Gradients", "Coordinates", "LodOrClampOrMip", + "HasD16"]; GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode; let PrimaryKey = ["BaseOpcode"]; let PrimaryKeyName = "getMIMGBaseOpcodeInfo"; } +def MIMGDim : GenericEnum { + let FilterClass = "AMDGPUDimProps"; +} + +def MIMGDimInfoTable : GenericTable { + let FilterClass = "AMDGPUDimProps"; + let CppTypeName = "MIMGDimInfo"; + let Fields = ["Dim", "NumCoords", "NumGradients", "DA"]; + GenericEnum TypeOf_Dim = MIMGDim; + + let PrimaryKey = ["Dim"]; + let PrimaryKeyName = "getMIMGDimInfo"; +} + class mimg si, bits<7> vi = si> { field bits<7> SI = si; field bits<7> VI = vi; @@ -188,6 +207,7 @@ multiclass MIMG_Store op, string asm, bit has_d16, bit mip = 0> { def "" : MIMGBaseOpcode { + let Store = 1; let LodOrClampOrMip = mip; let HasD16 = has_d16; } @@ -263,7 +283,10 @@ } multiclass MIMG_Atomic { // 64-bit atomics - def "" : MIMGBaseOpcode; + def "" : MIMGBaseOpcode { + let Atomic = 1; + let AtomicX2 = isCmpSwap; + } let BaseOpcode = !cast(NAME) in { // _V* variants have different dst size, but the size is encoded implicitly, @@ -309,6 +332,7 @@ class MIMG_Sampler_BaseOpcode : MIMGBaseOpcode { + let Sampler = 1; let NumExtraArgs = !size(sample.ExtraAddrArgs); let Gradients = sample.Gradients; let LodOrClampOrMip = !ne(sample.LodOrClamp, ""); @@ -458,197 +482,32 @@ //def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>; //def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>; -/********** ============================== **********/ -/********** Dimension-aware image patterns **********/ -/********** ============================== **********/ - -class getDwordsType { - int NumDwords = dwords; - string suffix = !if(!lt(dwords, 1), ?, - !if(!eq(dwords, 1), "_V1", - !if(!eq(dwords, 2), "_V2", - !if(!le(dwords, 4), "_V4", - !if(!le(dwords, 8), "_V8", - !if(!le(dwords, 16), "_V16", ?)))))); - ValueType VT = !if(!lt(dwords, 1), ?, - !if(!eq(dwords, 1), f32, - !if(!eq(dwords, 2), v2f32, - !if(!le(dwords, 4), v4f32, - !if(!le(dwords, 8), v8f32, - !if(!le(dwords, 16), v16f32, ?)))))); - RegisterClass VReg = !if(!lt(dwords, 1), ?, - !if(!eq(dwords, 1), VGPR_32, - !if(!eq(dwords, 2), VReg_64, - !if(!le(dwords, 4), VReg_128, - !if(!le(dwords, 8), VReg_256, - !if(!le(dwords, 16), VReg_512, ?)))))); -} +/********** ========================================= **********/ +/********** Table of dimension-aware image intrinsics **********/ +/********** ========================================= **********/ -class makeRegSequence_Fold { - int idx = i; - dag lhs = d; +class ImageDimIntrinsicInfo { + Intrinsic Intr = I; + MIMGBaseOpcode BaseOpcode = !cast(!strconcat("IMAGE_", I.P.OpMod)); + AMDGPUDimProps Dim = I.P.Dim; } -// Generate a dag node which returns a vector register of class RC into which -// the source operands given by names have been inserted (assuming that each -// name corresponds to an operand whose size is equal to a subregister). -class makeRegSequence names> { - dag ret = - !if(!eq(!size(names), 1), - !dag(COPY_TO_REGCLASS, [?, RC], [names[0], ?]), - !foldl(makeRegSequence_Fold<0, (vt (IMPLICIT_DEF))>, names, f, name, - makeRegSequence_Fold< - !add(f.idx, 1), - !con((INSERT_SUBREG f.lhs), - !dag(INSERT_SUBREG, [?, !cast("sub"#f.idx)], - [name, ?]))>).lhs); -} +def ImageDimIntrinsicTable : GenericTable { + let FilterClass = "ImageDimIntrinsicInfo"; + let Fields = ["Intr", "BaseOpcode", "Dim"]; + GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode; + GenericEnum TypeOf_Dim = MIMGDim; -class ImageDimPattern : GCNPat<(undef), (undef)> { - list AddrArgs = I.P.AddrDefaultArgs; - getDwordsType AddrDwords = getDwordsType; - - MIMG MI = - !cast(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix)); - - // DAG fragment to match data arguments (vdata for store/atomic, dmask - // for non-atomic). - dag MatchDataDag = - !con(!dag(I, !foreach(arg, I.P.DataArgs, dty), - !foreach(arg, I.P.DataArgs, arg.Name)), - !if(I.P.IsAtomic, (I), (I i32:$dmask))); - - // DAG fragment to match vaddr arguments. - dag MatchAddrDag = !dag(I, !foreach(arg, AddrArgs, arg.Type.VT), - !foreach(arg, AddrArgs, arg.Name)); - - // DAG fragment to match sampler resource and unorm arguments. - dag MatchSamplerDag = !if(I.P.IsSample, (I v4i32:$sampler, i1:$unorm), (I)); - - // DAG node that generates the MI vdata for store/atomic - getDwordsType DataDwords = getDwordsType; - dag GenDataDag = - !if(I.P.IsAtomic, (MI makeRegSequence.ret), - !if(!size(I.P.DataArgs), (MI $vdata), (MI))); - - // DAG node that generates the MI vaddr - dag GenAddrDag = makeRegSequence.ret; - // DAG fragments that generate various inline flags - dag GenDmask = - !if(I.P.IsAtomic, (MI !add(!shl(1, DataDwords.NumDwords), -1)), - (MI (as_i32imm $dmask))); - dag GenGLC = - !if(I.P.IsAtomic, (MI 1), - (MI (bitextract_imm<0> $cachepolicy))); - - dag MatchIntrinsic = !con(MatchDataDag, - MatchAddrDag, - (I v8i32:$rsrc), - MatchSamplerDag, - (I 0/*texfailctrl*/, - i32:$cachepolicy)); - let PatternToMatch = - !if(!size(I.RetTypes), (dty MatchIntrinsic), MatchIntrinsic); - - bit IsCmpSwap = !and(I.P.IsAtomic, !eq(!size(I.P.DataArgs), 2)); - dag ImageInstruction = - !con(GenDataDag, - (MI GenAddrDag), - (MI $rsrc), - !if(I.P.IsSample, (MI $sampler), (MI)), - GenDmask, - !if(I.P.IsSample, (MI (as_i1imm $unorm)), (MI 1)), - GenGLC, - (MI (bitextract_imm<1> $cachepolicy), - 0, /* r128 */ - 0, /* tfe */ - 0 /*(as_i1imm $lwe)*/, - { I.P.Dim.DA }), - !if(MI.BaseOpcode.HasD16, (MI d16), (MI))); - let ResultInstrs = [ - !if(IsCmpSwap, (EXTRACT_SUBREG ImageInstruction, sub0), ImageInstruction) - ]; + let PrimaryKey = ["Intr"]; + let PrimaryKeyName = "getImageDimIntrinsicInfo"; + let PrimaryKeyEarlyOut = 1; } foreach intr = !listconcat(AMDGPUImageDimIntrinsics, - AMDGPUImageDimGetResInfoIntrinsics) in { - def intr#_pat_v1 : ImageDimPattern; - def intr#_pat_v2 : ImageDimPattern; - def intr#_pat_v4 : ImageDimPattern; -} - -// v2f16 and v4f16 are used as data types to signal that D16 should be used. -// However, they are not (always) legal types, and the SelectionDAG requires us -// to legalize them before running any patterns. So we legalize them by -// converting to an int type of equal size and using an internal 'd16helper' -// intrinsic instead which signifies both the use of D16 and actually allows -// this integer-based return type. -multiclass ImageDimD16Helper { - let SubtargetPredicate = HasUnpackedD16VMem in { - def _unpacked_v1 : ImageDimPattern; - def _unpacked_v2 : ImageDimPattern; - def _unpacked_v4 : ImageDimPattern; - } // End HasUnpackedD16VMem. - - let SubtargetPredicate = HasPackedD16VMem in { - def _packed_v1 : ImageDimPattern; - def _packed_v2 : ImageDimPattern; - def _packed_v4 : ImageDimPattern; - } // End HasPackedD16VMem. -} - -foreach intr = AMDGPUImageDimIntrinsics in { - def intr#_d16helper_profile : AMDGPUDimProfileCopy { - let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty); - let DataArgs = !foreach(arg, intr.P.DataArgs, AMDGPUArg); - } - - let TargetPrefix = "SI", isTarget = 1 in - def int_SI_image_d16helper_ # intr.P.OpMod # intr.P.Dim.Name : - AMDGPUImageDimIntrinsic(intr#"_d16helper_profile"), - intr.IntrProperties, intr.Properties>; - - defm intr#_d16 : - ImageDimD16Helper< - intr, !cast( - "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name)>; -} - -foreach intr = AMDGPUImageDimGatherIntrinsics in { - def intr#_pat3 : ImageDimPattern; - - def intr#_d16helper_profile : AMDGPUDimProfileCopy { - let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty); - let DataArgs = !foreach(arg, intr.P.DataArgs, AMDGPUArg); - } - - let TargetPrefix = "SI", isTarget = 1 in - def int_SI_image_d16helper_ # intr.P.OpMod # intr.P.Dim.Name : - AMDGPUImageDimIntrinsic(intr#"_d16helper_profile"), - intr.IntrProperties, intr.Properties>; - - let SubtargetPredicate = HasUnpackedD16VMem in { - def intr#_unpacked_v4 : - ImageDimPattern( - "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name), - "_V4", v4i32, 1>; - } // End HasUnpackedD16VMem. - - let SubtargetPredicate = HasPackedD16VMem in { - def intr#_packed_v4 : - ImageDimPattern( - "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name), - "_V2", v2i32, 1>; - } // End HasPackedD16VMem. -} - -foreach intr = AMDGPUImageDimAtomicIntrinsics in { - def intr#_pat1 : ImageDimPattern; + AMDGPUImageDimGetResInfoIntrinsics, + AMDGPUImageDimGatherIntrinsics, + AMDGPUImageDimAtomicIntrinsics) in { + def : ImageDimIntrinsicInfo; } /********** ======================= **********/ Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -42,6 +42,8 @@ SelectionDAG &DAG) const override; SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, MVT VT, unsigned Offset) const; + SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, + SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -4401,6 +4401,253 @@ return DAG.getUNDEF(VT); } +static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL, + ArrayRef Elts) { + assert(!Elts.empty()); + MVT Type; + unsigned NumElts; + + if (Elts.size() == 1) { + Type = MVT::f32; + NumElts = 1; + } else if (Elts.size() == 2) { + Type = MVT::v2f32; + NumElts = 2; + } else if (Elts.size() <= 4) { + Type = MVT::v4f32; + NumElts = 4; + } else if (Elts.size() <= 8) { + Type = MVT::v8f32; + NumElts = 8; + } else { + assert(Elts.size() <= 16); + Type = MVT::v16f32; + NumElts = 16; + } + + SmallVector VecElts(NumElts); + for (unsigned i = 0; i < Elts.size(); ++i) { + SDValue Elt = Elts[i]; + if (Elt.getValueType() != MVT::f32) + Elt = DAG.getBitcast(MVT::f32, Elt); + VecElts[i] = Elt; + } + for (unsigned i = Elts.size(); i < NumElts; ++i) + VecElts[i] = DAG.getUNDEF(MVT::f32); + + if (NumElts == 1) + return VecElts[0]; + return DAG.getBuildVector(Type, DL, VecElts); +} + +static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG, + SDValue *GLC, SDValue *SLC) { + auto CachePolicyConst = dyn_cast(CachePolicy.getNode()); + if (!CachePolicyConst) + return false; + + uint64_t Value = CachePolicyConst->getZExtValue(); + SDLoc DL(CachePolicy); + if (GLC) { + *GLC = DAG.getTargetConstant((Value & 0x1) ? 1 : 0, DL, MVT::i32); + Value &= ~(uint64_t)0x1; + } + if (SLC) { + *SLC = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32); + Value &= ~(uint64_t)0x2; + } + + return Value == 0; +} + +SDValue SITargetLowering::lowerImage(SDValue Op, + const AMDGPU::ImageDimIntrinsicInfo *Intr, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MachineFunction &MF = DAG.getMachineFunction(); + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = + AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); + const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim); + + SmallVector ResultTypes(Op->value_begin(), Op->value_end()); + bool IsD16 = false; + SDValue VData; + int NumVDataDwords; + unsigned AddrIdx; // Index of first address argument + unsigned DMask; + + if (BaseOpcode->Atomic) { + VData = Op.getOperand(2); + + bool Is64Bit = VData.getValueType() == MVT::i64; + if (BaseOpcode->AtomicX2) { + SDValue VData2 = Op.getOperand(3); + VData = DAG.getBuildVector(Is64Bit ? MVT::v2i64 : MVT::v2i32, + DL, {VData, VData2}); + if (Is64Bit) + VData = DAG.getBitcast(MVT::v4i32, VData); + + ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32; + DMask = Is64Bit ? 0xf : 0x3; + NumVDataDwords = Is64Bit ? 4 : 2; + AddrIdx = 4; + } else { + DMask = Is64Bit ? 0x3 : 0x1; + NumVDataDwords = Is64Bit ? 2 : 1; + AddrIdx = 3; + } + } else { + unsigned DMaskIdx; + + if (BaseOpcode->Store) { + VData = Op.getOperand(2); + + MVT StoreVT = VData.getSimpleValueType(); + if (StoreVT == MVT::f16 || StoreVT == MVT::v2f16 || StoreVT == MVT::v4f16) { + if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS || + !BaseOpcode->HasD16) + return Op; // D16 is unsupported for this instruction + + IsD16 = true; + VData = handleD16VData(VData, DAG); + } + + NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32; + DMaskIdx = 3; + } else { + MVT LoadVT = Op.getSimpleValueType(); + if (LoadVT == MVT::f16 || LoadVT == MVT::v2f16 || LoadVT == MVT::v4f16) { + if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS || + !BaseOpcode->HasD16) + return Op; // D16 is unsupported for this instruction + + IsD16 = true; + bool Unpacked = Subtarget->hasUnpackedD16VMem(); + if (LoadVT.isVector() && (Unpacked || !isTypeLegal(LoadVT))) { + // TODO simplify this for the packed case once v4f16 is legal + if (LoadVT == MVT::v2f16) { + ResultTypes[0] = Unpacked ? MVT::v2i32 : MVT::i32; + } else if (LoadVT == MVT::v4f16) { + ResultTypes[0] = Unpacked ? MVT::v4i32 : MVT::v2i32; + } + } + } + + NumVDataDwords = (ResultTypes[0].getSizeInBits() + 31) / 32; + DMaskIdx = isa(Op) ? 2 : 1; + } + + auto DMaskConst = dyn_cast(Op.getOperand(DMaskIdx)); + if (!DMaskConst) + return Op; + + AddrIdx = DMaskIdx + 1; + DMask = DMaskConst->getZExtValue(); + if (!DMask && !BaseOpcode->Store) { + // Eliminate no-op loads. Stores with dmask == 0 are *not* no-op: they + // store the channels' default values. + SDValue Undef = DAG.getUNDEF(Op.getValueType()); + if (isa(Op)) + return DAG.getMergeValues({ Undef, Op.getOperand(0) }, DL); + return Undef; + } + } + + unsigned NumVAddrs = BaseOpcode->NumExtraArgs + + (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + + (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + + (BaseOpcode->LodOrClampOrMip ? 1 : 0); + SmallVector VAddrs; + for (unsigned i = 0; i < NumVAddrs; ++i) + VAddrs.push_back(Op.getOperand(AddrIdx + i)); + SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs); + + SDValue True = DAG.getTargetConstant(1, DL, MVT::i32); + SDValue False = DAG.getTargetConstant(0, DL, MVT::i32); + unsigned CtrlIdx; // Index of texfailctrl argument + SDValue Unorm; + if (!BaseOpcode->Sampler) { + Unorm = True; + CtrlIdx = AddrIdx + NumVAddrs + 1; + } else { + auto UnormConst = + dyn_cast(Op.getOperand(AddrIdx + NumVAddrs + 2)); + if (!UnormConst) + return Op; + + Unorm = UnormConst->getZExtValue() ? True : False; + CtrlIdx = AddrIdx + NumVAddrs + 3; + } + + SDValue TexFail = Op.getOperand(CtrlIdx); + auto TexFailConst = dyn_cast(TexFail.getNode()); + if (!TexFailConst || TexFailConst->getZExtValue() != 0) + return Op; + + SDValue GLC; + SDValue SLC; + if (BaseOpcode->Atomic) { + GLC = True; // TODO no-return optimization + if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC)) + return Op; + } else { + if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC)) + return Op; + } + + SmallVector Ops; + if (BaseOpcode->Store || BaseOpcode->Atomic) + Ops.push_back(VData); // vdata + Ops.push_back(VAddr); + Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc + if (BaseOpcode->Sampler) + Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler + Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32)); + Ops.push_back(Unorm); + Ops.push_back(GLC); + Ops.push_back(SLC); + Ops.push_back(False); // r128 + Ops.push_back(False); // tfe + Ops.push_back(False); // lwe + Ops.push_back(DimInfo->DA ? True : False); + if (BaseOpcode->HasD16) + Ops.push_back(IsD16 ? True : False); + if (isa(Op)) + Ops.push_back(Op.getOperand(0)); // chain + + int NumVAddrDwords = VAddr.getValueType().getSizeInBits() / 32; + int Opcode = -1; + + if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) + Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx8, + NumVDataDwords, NumVAddrDwords); + if (Opcode == -1) + Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx6, + NumVDataDwords, NumVAddrDwords); + assert(Opcode != -1); + + MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops); + if (auto MemOp = dyn_cast(Op)) { + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); + *MemRefs = MemOp->getMemOperand(); + NewNode->setMemRefs(MemRefs, MemRefs + 1); + } + + if (BaseOpcode->AtomicX2) { + SmallVector Elt; + DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1); + return DAG.getMergeValues({ Elt[0], SDValue(NewNode, 1) }, DL); + } else if (IsD16 && !BaseOpcode->Store) { + MVT LoadVT = Op.getSimpleValueType(); + SDValue Adjusted = + adjustLoadValueTypeImpl(SDValue(NewNode, 0), LoadVT, DL, DAG, + Subtarget->hasUnpackedD16VMem()); + return DAG.getMergeValues({ Adjusted, SDValue(NewNode, 1) }, DL); + } + + return SDValue(NewNode, 0); +} + SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -4738,6 +4985,10 @@ return SDValue(); } default: + if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = + AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) + return lowerImage(Op, ImageDimIntr, DAG); + return Op; } } @@ -5018,22 +5269,9 @@ return SDValue(); } default: - EVT LoadVT = Op.getValueType(); - if (LoadVT.getScalarSizeInBits() != 16) - return SDValue(); - - const AMDGPU::D16ImageDimIntrinsic *D16ImageDimIntr = - AMDGPU::lookupD16ImageDimIntrinsic(IntrID); - if (D16ImageDimIntr) { - bool Unpacked = Subtarget->hasUnpackedD16VMem(); - MemSDNode *M = cast(Op); - - if (isTypeLegal(LoadVT) && (!Unpacked || LoadVT == MVT::f16)) - return SDValue(); - - return adjustLoadValueType(D16ImageDimIntr->D16HelperIntr, - M, DAG, true); - } + if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = + AMDGPU::getImageDimIntrinsicInfo(IntrID)) + return lowerImage(Op, ImageDimIntr, DAG); return SDValue(); } @@ -5288,25 +5526,9 @@ return SDValue(); } default: { - const AMDGPU::D16ImageDimIntrinsic *D16ImageDimIntr = - AMDGPU::lookupD16ImageDimIntrinsic(IntrinsicID); - if (D16ImageDimIntr) { - SDValue VData = Op.getOperand(2); - EVT StoreVT = VData.getValueType(); - if (((StoreVT == MVT::v2f16 || StoreVT == MVT::v4f16) && - Subtarget->hasUnpackedD16VMem()) || - !isTypeLegal(StoreVT)) { - SmallVector Ops(Op.getNode()->op_values()); - - Ops[1] = DAG.getConstant(D16ImageDimIntr->D16HelperIntr, DL, MVT::i32); - Ops[2] = handleD16VData(VData, DAG); - - MemSDNode *M = cast(Op); - return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Op->getVTList(), - Ops, M->getMemoryVT(), - M->getMemOperand()); - } - } + if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = + AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) + return lowerImage(Op, ImageDimIntr, DAG); return Op; } Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -157,6 +157,10 @@ LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); +LLVM_READONLY +int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, + unsigned VDataDwords, unsigned VAddrDwords); + LLVM_READONLY int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -110,6 +110,13 @@ #define GET_MIMGINFOTABLE_IMPL #include "AMDGPUGenSearchableTables.inc" +int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, + unsigned VDataDwords, unsigned VAddrDwords) { + const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, + VDataDwords, VAddrDwords); + return Info ? Info->Opcode : -1; +} + int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { const MIMGInfo *OrigInfo = getMIMGInfo(Opc); const MIMGInfo *NewInfo = Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=GCN %s + +; GCN-LABEL: {{^}}getlod_1d: +; GCN: image_get_lod v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}} +; GCN: s_waitcnt vmcnt(0) +define amdgpu_ps <4 x float> @getlod_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %r +} + +; GCN-LABEL: {{^}}getlod_2d: +; GCN: image_get_lod v[0:1], v[0:1], s[0:7], s[8:11] dmask:0x3{{$}} +; GCN: s_waitcnt vmcnt(0) +define amdgpu_ps <2 x float> @getlod_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +main_body: + %r = call <2 x float> @llvm.amdgcn.image.getlod.2d.v2f32.f32(i32 3, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <2 x float> %r +} + +; GCN-LABEL: {{^}}adjust_writemask_getlod_none_enabled: +; GCN-NOT: image +define amdgpu_ps <4 x float> @adjust_writemask_getlod_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.getlod.2d.v4f32.f32(i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %r +} + +declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.getlod.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 +declare <2 x float> @llvm.amdgcn.image.getlod.2d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0 + +attributes #0 = { nounwind readnone }