Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -137,10 +137,8 @@ bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, bool &Imm) const; bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; - bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; - bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; bool SelectSMRDBufferGLC(SDValue GLC, SDValue &Out) const; bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; @@ -1254,20 +1252,16 @@ bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const { bool Imm; - return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; -} - -bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, - SDValue &Offset) const { - - if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) - return false; - bool Imm; if (!SelectSMRD(Addr, SBase, Offset, Imm)) return false; - return !Imm && isa(Offset); + // CI supports 32-bit literals. + if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS && + isa(Offset)) + return true; + + return Imm; } bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, @@ -1280,19 +1274,16 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const { bool Imm; - return SelectSMRDOffset(Addr, true, Offset, Imm) && Imm; -} - -bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, - SDValue &Offset) const { - if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) - return false; - bool Imm; if (!SelectSMRDOffset(Addr, true, Offset, Imm)) return false; - return !Imm && isa(Offset); + // CI supports 32-bit literals. + if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS && + isa(Offset)) + return true; + + return Imm; } bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, Index: lib/Target/AMDGPU/SMInstructions.td =================================================================== --- lib/Target/AMDGPU/SMInstructions.td +++ lib/Target/AMDGPU/SMInstructions.td @@ -234,11 +234,9 @@ }]>; def SMRDImm : ComplexPattern; -def SMRDImm32 : ComplexPattern; def SMRDSgpr : ComplexPattern; def SMRDSgprConst : ComplexPattern; def SMRDBufferImm : ComplexPattern; -def SMRDBufferImm32 : ComplexPattern; def SMRDBufferSgpr : ComplexPattern; def SMRDBufferGLC : ComplexPattern; @@ -299,7 +297,7 @@ defm : SMRD_LoadIntrinsicPat; // 1. Offset as an immediate -def SM_LOAD_PATTERN : Pat < // name this pattern to reuse AddedComplexity on CI +def : Pat < (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)), (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset, 0) >; @@ -533,26 +531,3 @@ } def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>; - -let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in { - -class SMRD_Pattern_ci : Pat < - (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0))> { - let Predicates = [isCIOnly]; -} - -def : SMRD_Pattern_ci <"S_LOAD_DWORD", i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX2", v2i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX4", v4i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX8", v8i32>; -def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>; - -def : Pat < - (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)), - (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset, 0)> { - let Predicates = [isCI]; // should this be isCIOnly? -} - -} // End let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity - Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll @@ -33,9 +33,7 @@ ;CHECK-LABEL: {{^}}buffer_load_immoffs_large: ;SI: s_movk_i32 s4, 0x2000 ;SI: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 -;TODO: this should use SMEM: -;CI: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x2000 -;CI: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen +;CI: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x800 ;VI: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x2000 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {