Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -877,13 +877,27 @@ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); } +static EVT memVTFromImageData(Type *Ty, unsigned DMaskLanes) { + assert(DMaskLanes != 0); + + if (auto *VT = dyn_cast(Ty)) { + unsigned NumElts = std::min(DMaskLanes, + static_cast(VT->getNumElements())); + return EVT::getVectorVT(Ty->getContext(), + EVT::getEVT(VT->getElementType()), + NumElts); + } + + return EVT::getEVT(Ty); +} + // Peek through TFE struct returns to only use the data size. -static EVT memVTFromImageReturn(StructType &Ty) { +static EVT memVTFromImageReturn(StructType &Ty, unsigned DMaskLanes) { // Only limited forms of aggregate type currently expected. if (Ty.getNumContainedTypes() != 2 || !Ty.getContainedType(1)->isIntegerTy(32)) return EVT(); - return EVT::getEVT(Ty.getContainedType(0)); + return memVTFromImageData(Ty.getContainedType(0), DMaskLanes); } bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, @@ -912,19 +926,46 @@ Info.flags = MachineMemOperand::MODereferenceable; if (Attr.hasFnAttribute(Attribute::ReadOnly)) { - Info.opc = ISD::INTRINSIC_W_CHAIN; - // TODO: Account for dmask reducing loaded size. - if (auto *ST = dyn_cast(CI.getType())) { - // Some intrinsics return an aggregate type - special case to work out - // the correct memVT - Info.memVT = memVTFromImageReturn(*ST); + unsigned DMaskLanes = 4; + + if (RsrcIntr->IsImage) { + const AMDGPU::ImageDimIntrinsicInfo *Intr + = AMDGPU::getImageDimIntrinsicInfo(IntrID); + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = + AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); + + if (!BaseOpcode->Gather4) { + // If this isn't a gather, we may have excess loaded elements in the + // IR type. Check the dmask for the real number of elements loaded. + + unsigned DMask + = cast(CI.getArgOperand(0))->getZExtValue(); + DMaskLanes = DMask == 0 ? 1 : countPopulation(DMask); + } + + if (auto *ST = dyn_cast(CI.getType())) { + // Some intrinsics return an aggregate type - special case to work out + // the correct memVT + Info.memVT = memVTFromImageReturn(*ST, DMaskLanes); + } else + Info.memVT = memVTFromImageData(CI.getType(), DMaskLanes); } else - Info.memVT = MVT::getVT(CI.getType(), true); + Info.memVT = EVT::getEVT(CI.getType()); + // FIXME: What does alignment mean for an image? + Info.opc = ISD::INTRINSIC_W_CHAIN; Info.flags |= MachineMemOperand::MOLoad; } else if (Attr.hasFnAttribute(Attribute::WriteOnly)) { Info.opc = ISD::INTRINSIC_VOID; - Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); + + Type *DataTy = CI.getArgOperand(0)->getType(); + if (RsrcIntr->IsImage) { + unsigned DMask = cast(CI.getArgOperand(1))->getZExtValue(); + unsigned DMaskLanes = DMask == 0 ? 1 : countPopulation(DMask); + Info.memVT = memVTFromImageData(DataTy, DMaskLanes); + } else + Info.memVT = EVT::getEVT(DataTy); + Info.flags |= MachineMemOperand::MOStore; } else { // Atomic