Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -304,7 +304,8 @@ llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - !if(NoMem, [IntrNoMem], [IntrReadMem])>; + !if(NoMem, [IntrNoMem], [IntrReadMem]), "", + !if(NoMem, [], [SDNPMemOperand])>; def int_amdgcn_image_load : AMDGPUImageLoad; def int_amdgcn_image_load_mip : AMDGPUImageLoad; @@ -320,7 +321,7 @@ llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - []>; + [IntrWriteMem], "", [SDNPMemOperand]>; def int_amdgcn_image_store : AMDGPUImageStore; def int_amdgcn_image_store_mip : AMDGPUImageStore; @@ -336,7 +337,8 @@ llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - !if(NoMem, [IntrNoMem], [IntrReadMem])>; + !if(NoMem, [IntrNoMem], [IntrReadMem]), "", + !if(NoMem, [], [SDNPMemOperand])>; // Basic sample def int_amdgcn_image_sample : AMDGPUImageSample; @@ -428,7 +430,7 @@ llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic; def int_amdgcn_image_atomic_add : AMDGPUImageAtomic; @@ -451,7 +453,7 @@ llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; class AMDGPUBufferLoad : Intrinsic < [llvm_anyfloat_ty], Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -575,6 +575,157 @@ return true; } + + // Image load. + case Intrinsic::amdgcn_image_load: + case Intrinsic::amdgcn_image_load_mip: + + // Sample. + case Intrinsic::amdgcn_image_sample: + case Intrinsic::amdgcn_image_sample_cl: + case Intrinsic::amdgcn_image_sample_d: + case Intrinsic::amdgcn_image_sample_d_cl: + case Intrinsic::amdgcn_image_sample_l: + case Intrinsic::amdgcn_image_sample_b: + case Intrinsic::amdgcn_image_sample_b_cl: + case Intrinsic::amdgcn_image_sample_lz: + case Intrinsic::amdgcn_image_sample_cd: + case Intrinsic::amdgcn_image_sample_cd_cl: + + // Sample with comparison. + case Intrinsic::amdgcn_image_sample_c: + case Intrinsic::amdgcn_image_sample_c_cl: + case Intrinsic::amdgcn_image_sample_c_d: + case Intrinsic::amdgcn_image_sample_c_d_cl: + case Intrinsic::amdgcn_image_sample_c_l: + case Intrinsic::amdgcn_image_sample_c_b: + case Intrinsic::amdgcn_image_sample_c_b_cl: + case Intrinsic::amdgcn_image_sample_c_lz: + case Intrinsic::amdgcn_image_sample_c_cd: + case Intrinsic::amdgcn_image_sample_c_cd_cl: + + // Sample with offsets. + case Intrinsic::amdgcn_image_sample_o: + case Intrinsic::amdgcn_image_sample_cl_o: + case Intrinsic::amdgcn_image_sample_d_o: + case Intrinsic::amdgcn_image_sample_d_cl_o: + case Intrinsic::amdgcn_image_sample_l_o: + case Intrinsic::amdgcn_image_sample_b_o: + case Intrinsic::amdgcn_image_sample_b_cl_o: + case Intrinsic::amdgcn_image_sample_lz_o: + case Intrinsic::amdgcn_image_sample_cd_o: + case Intrinsic::amdgcn_image_sample_cd_cl_o: + + // Sample with comparison and offsets. + case Intrinsic::amdgcn_image_sample_c_o: + case Intrinsic::amdgcn_image_sample_c_cl_o: + case Intrinsic::amdgcn_image_sample_c_d_o: + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + case Intrinsic::amdgcn_image_sample_c_l_o: + case Intrinsic::amdgcn_image_sample_c_b_o: + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + case Intrinsic::amdgcn_image_sample_c_lz_o: + case Intrinsic::amdgcn_image_sample_c_cd_o: + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + + // Basic gather4 + case Intrinsic::amdgcn_image_gather4: + case Intrinsic::amdgcn_image_gather4_cl: + case Intrinsic::amdgcn_image_gather4_l: + case Intrinsic::amdgcn_image_gather4_b: + case Intrinsic::amdgcn_image_gather4_b_cl: + case Intrinsic::amdgcn_image_gather4_lz: + + // Gather4 with comparison + case Intrinsic::amdgcn_image_gather4_c: + case Intrinsic::amdgcn_image_gather4_c_cl: + case Intrinsic::amdgcn_image_gather4_c_l: + case Intrinsic::amdgcn_image_gather4_c_b: + case Intrinsic::amdgcn_image_gather4_c_b_cl: + case Intrinsic::amdgcn_image_gather4_c_lz: + + // Gather4 with offsets + case Intrinsic::amdgcn_image_gather4_o: + case Intrinsic::amdgcn_image_gather4_cl_o: + case Intrinsic::amdgcn_image_gather4_l_o: + case Intrinsic::amdgcn_image_gather4_b_o: + case Intrinsic::amdgcn_image_gather4_b_cl_o: + case Intrinsic::amdgcn_image_gather4_lz_o: + + // Gather4 with comparison and offsets + case Intrinsic::amdgcn_image_gather4_c_o: + case Intrinsic::amdgcn_image_gather4_c_cl_o: + case Intrinsic::amdgcn_image_gather4_c_l_o: + case Intrinsic::amdgcn_image_gather4_c_b_o: + case Intrinsic::amdgcn_image_gather4_c_b_cl_o: + case Intrinsic::amdgcn_image_gather4_c_lz_o: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(1)); + Info.align = 0; + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable; + return true; + } + case Intrinsic::amdgcn_image_store: + case Intrinsic::amdgcn_image_store_mip: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(2)); + Info.flags = MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + Info.align = 0; + return true; + } + case Intrinsic::amdgcn_image_atomic_swap: + case Intrinsic::amdgcn_image_atomic_add: + case Intrinsic::amdgcn_image_atomic_sub: + case Intrinsic::amdgcn_image_atomic_smin: + case Intrinsic::amdgcn_image_atomic_umin: + case Intrinsic::amdgcn_image_atomic_smax: + case Intrinsic::amdgcn_image_atomic_umax: + case Intrinsic::amdgcn_image_atomic_and: + case Intrinsic::amdgcn_image_atomic_or: + case Intrinsic::amdgcn_image_atomic_xor: + case Intrinsic::amdgcn_image_atomic_inc: + case Intrinsic::amdgcn_image_atomic_dec: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(2)); + + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + return true; + } + case Intrinsic::amdgcn_image_atomic_cmpswap: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(3)); + + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + return true; + } default: return false; } @@ -2946,24 +3097,12 @@ SIMachineFunctionInfo *MFI = MF->getInfo(); if (TII->isMIMG(MI)) { - if (!MI.memoperands_empty()) - return BB; + if (MI.memoperands_empty() && MI.mayLoadOrStore()) { + report_fatal_error("missing mem operand from MIMG instruction"); + } // Add a memoperand for mimg instructions so that they aren't assumed to // be ordered memory instuctions. - MachinePointerInfo PtrInfo(MFI->getImagePSV()); - MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable; - if (MI.mayStore()) - Flags |= MachineMemOperand::MOStore; - - if (MI.mayLoad()) - Flags |= MachineMemOperand::MOLoad; - - if (Flags != MachineMemOperand::MODereferenceable) { - auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0); - MI.addMemOperand(*MF, MMO); - } - return BB; } Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -34,12 +34,14 @@ class MachineFrameInfo; class MachineFunction; +class SIInstrInfo; class TargetRegisterClass; class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { public: + // TODO: Is the img rsrc useful? explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : - PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } + PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} bool isConstant(const MachineFrameInfo *) const override { // This should probably be true for most images, but we will start by being @@ -136,7 +138,10 @@ std::array DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; AMDGPUBufferPseudoSourceValue BufferPSV; - AMDGPUImagePseudoSourceValue ImagePSV; + + DenseMap> ImagePSVs; + private: unsigned LDSWaveSpillSize = 0; @@ -629,12 +634,18 @@ return LDSWaveSpillSize; } + // FIXME: These should be unique const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { return &BufferPSV; } - const AMDGPUImagePseudoSourceValue *getImagePSV() const { - return &ImagePSV; + const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, + const Value *ImgRsrc) { + assert(ImgRsrc); + auto PSV = ImagePSVs.try_emplace( + ImgRsrc, + llvm::make_unique(TII)); + return PSV.first->second.get(); } }; Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -29,7 +29,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), BufferPSV(*(MF.getSubtarget().getInstrInfo())), - ImagePSV(*(MF.getSubtarget().getInstrInfo())), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false),