diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1388,12 +1388,16 @@ // in 8-bits, it can use a smaller encoding. if (!isUInt<32>(AM.BaseOffs / 4)) return false; - } else if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + } else if (Subtarget->getGeneration() < AMDGPUSubtarget::GFX9) { // On VI, these use the SMEM format and the offset is 20-bit in bytes. if (!isUInt<20>(AM.BaseOffs)) return false; - } else - llvm_unreachable("unhandled generation"); + } else { + // On GFX9 the offset is signed 21-bit in bytes (but must not be negative + // for S_BUFFER_* instructions). + if (!isInt<21>(AM.BaseOffs)) + return false; + } if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg. return true; diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll @@ -167,19 +167,31 @@ } define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) { -; GFX6789-LABEL: test_sink_smem_offset_neg400: -; GFX6789: ; %bb.0: ; %entry -; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70 -; GFX6789-NEXT: s_addc_u32 s1, s1, -1 -; GFX6789-NEXT: .LBB4_1: ; %loop -; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX6789-NEXT: s_waitcnt lgkmcnt(0) -; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0 -; GFX6789-NEXT: s_add_i32 s2, s2, -1 -; GFX6789-NEXT: s_cmp_lg_u32 s2, 0 -; GFX6789-NEXT: s_cbranch_scc1 .LBB4_1 -; GFX6789-NEXT: ; %bb.2: ; %end -; GFX6789-NEXT: s_endpgm +; GFX678-LABEL: test_sink_smem_offset_neg400: +; GFX678: ; %bb.0: ; %entry +; GFX678-NEXT: s_add_u32 s0, s0, 0xfffffe70 +; GFX678-NEXT: s_addc_u32 s1, s1, -1 +; GFX678-NEXT: .LBB4_1: ; %loop +; GFX678-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX678-NEXT: s_waitcnt lgkmcnt(0) +; GFX678-NEXT: s_load_dword s3, s[0:1], 0x0 +; GFX678-NEXT: s_add_i32 s2, s2, -1 +; GFX678-NEXT: s_cmp_lg_u32 s2, 0 +; GFX678-NEXT: s_cbranch_scc1 .LBB4_1 +; GFX678-NEXT: ; %bb.2: ; %end +; GFX678-NEXT: s_endpgm +; +; GFX9-LABEL: test_sink_smem_offset_neg400: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: .LBB4_1: ; %loop +; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dword s3, s[0:1], -0x190 +; GFX9-NEXT: s_add_i32 s2, s2, -1 +; GFX9-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9-NEXT: s_cbranch_scc1 .LBB4_1 +; GFX9-NEXT: ; %bb.2: ; %end +; GFX9-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 -400 br label %loop @@ -194,6 +206,3 @@ end: ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX678: {{.*}} -; GFX9: {{.*}}