Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -145,10 +145,8 @@ bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, bool &Imm) const; bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; - bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; - bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; @@ -1343,7 +1341,8 @@ if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) return false; - if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { + if (Gen == AMDGPUSubtarget::SEA_ISLANDS && + ByteOffset % 4 == 0 && isUInt<32>(EncodedOffset)) { // 32-bit Immediates are supported on Sea Islands. Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); } else { @@ -1376,20 +1375,15 @@ bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const { bool Imm; - return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; -} - -bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, - SDValue &Offset) const { - - if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) - return false; - bool Imm; if (!SelectSMRD(Addr, SBase, Offset, Imm)) return false; - return !Imm && isa(Offset); + if (Subtarget->has32BitLiteralSMRDOffset() && + isa(Offset)) + return true; + + return Imm; } bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, @@ -1402,19 +1396,15 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const { bool Imm; - return SelectSMRDOffset(Addr, Offset, Imm) && Imm; -} - -bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, - SDValue &Offset) const { - if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) - return false; - bool Imm; if (!SelectSMRDOffset(Addr, Offset, Imm)) return false; - return !Imm && isa(Offset); + if (Subtarget->has32BitLiteralSMRDOffset() && + isa(Offset)) + return true; + + return Imm; } bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -399,6 +399,10 @@ return FlatScratchInsts; } + bool has32BitLiteralSMRDOffset() const { + return getGeneration() == SEA_ISLANDS; + } + bool isMesaKernel(const MachineFunction &MF) const { return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv()); } Index: lib/Target/AMDGPU/SMInstructions.td =================================================================== --- lib/Target/AMDGPU/SMInstructions.td +++ lib/Target/AMDGPU/SMInstructions.td @@ -234,10 +234,8 @@ }]>; def SMRDImm : ComplexPattern; -def SMRDImm32 : ComplexPattern; def SMRDSgpr : ComplexPattern; def SMRDBufferImm : ComplexPattern; -def SMRDBufferImm32 : ComplexPattern; def SMRDBufferSgpr : ComplexPattern; let Predicates = [isGCN] in { @@ -276,7 +274,7 @@ defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; // 1. Offset as an immediate -def SM_LOAD_PATTERN : Pat < // name this pattern to reuse AddedComplexity on CI +def : Pat < (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)), (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset, 0) >; @@ -504,26 +502,3 @@ } def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>; - -let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in { - -class SMRD_Pattern_ci : Pat < - (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0))> { - let Predicates = [isCIOnly]; -} - -def : SMRD_Pattern_ci <"S_LOAD_DWORD", i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX2", v2i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX4", v4i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX8", v8i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>; - -def : Pat < - (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)), - (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset, 0)> { - let Predicates = [isCI]; // should this be isCIOnly? -} - -} // End let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity - Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -752,7 +752,7 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); - return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : + return isSI(ST) || isCI(ST) ? ByteOffset % 4 == 0 && isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); } } // end namespace AMDGPU