Index: lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- lib/Target/AMDGPU/MIMGInstructions.td +++ lib/Target/AMDGPU/MIMGInstructions.td @@ -25,6 +25,7 @@ let DecoderNamespace = dns; let isAsmParserOnly = !if(!eq(dns,""), 1, 0); let AsmMatchConverter = "cvtMIMG"; + let usesCustomInserter = 1; } class MIMG_NoSampler_Helper op, string asm, Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1677,9 +1677,32 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { + + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + MachineFunction *MF = BB->getParent(); + SIMachineFunctionInfo *MFI = MF->getInfo(); + + if (TII->isMIMG(MI)) { + if (!MI.memoperands_empty()) + return BB; + // Add a memoperand for mimg instructions so that they aren't assumed to + // be ordered memory instuctions. + + MachinePointerInfo PtrInfo(MFI->getImagePSV()); + MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable; + if (MI.mayStore()) + Flags |= MachineMemOperand::MOStore; + + if (MI.mayLoad()) + Flags |= MachineMemOperand::MOLoad; + + auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0); + MI.addMemOperand(*MF, MMO); + return BB; + } + switch (MI.getOpcode()) { case AMDGPU::SI_INIT_M0: { - const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addOperand(MI.getOperand(0)); @@ -1687,10 +1710,6 @@ return BB; } case AMDGPU::GET_GROUPSTATICSIZE: { - const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); - - MachineFunction *MF = BB->getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo(); DebugLoc DL = MI.getDebugLoc(); BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32)) .addOperand(MI.getOperand(0)) @@ -1714,7 +1733,6 @@ return splitKillBlock(MI, BB); case AMDGPU::V_CNDMASK_B64_PSEUDO: { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); unsigned Dst = MI.getOperand(0).getReg(); unsigned Src0 = MI.getOperand(1).getReg(); Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -23,6 +23,31 @@ class MachineRegisterInfo; +class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { +public: + explicit AMDGPUImagePseudoSourceValue() : + PseudoSourceValue(PseudoSourceValue::TargetCustom) { } + + virtual bool isConstant(const MachineFrameInfo *) const { + // This should probably be true for most images, but we will start by being + // conservative. + return false; + } + + virtual bool isAliased(const MachineFrameInfo *) const { + // FIXME: If we ever change image intrinsics to accept fat pointers, then + // this could be true for some cases. + return false; + } + + virtual bool mayAlias(const MachineFrameInfo*) const { + // FIXME: If we ever change image intrinsics to accept fat pointers, then + // this could be true for some cases. + return false; + } +}; + + /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo final : public AMDGPUMachineFunction { @@ -73,6 +98,8 @@ // Stack object indices for work item IDs. std::array DebuggerWorkItemIDStackObjectIndices; + std::unique_ptr ImagePSV; + public: // FIXME: Make private unsigned LDSWaveSpillSize; @@ -434,6 +461,10 @@ } llvm_unreachable("unexpected dimension"); } + + AMDGPUImagePseudoSourceValue *getImagePSV() { + return ImagePSV.get(); + } }; } // End namespace llvm Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -52,6 +52,7 @@ WavesPerEU(0, 0), DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), + ImagePSV(llvm::make_unique()), LDSWaveSpillSize(0), PSInputEna(0), NumUserSGPRs(0),