diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1991,6 +1991,10 @@ SBase = Expand32BitAddress(N0); return true; } + if (SelectSMRDOffset(N0, Offset, Imm, Imm32Only)) { + SBase = Expand32BitAddress(N1); + return true; + } } return false; } diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll --- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll @@ -1,15 +1,16 @@ -; Test that DAG->DAG ISel is able to pick up the S_LOAD_DWORDX4_SGPR instruction that fetches the offset -; from a register. - -; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -global-isel -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GISEL %s +; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefixes=GCN,SDAG %s +; RUN: llc -march=amdgcn -global-isel -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefixes=GCN,GISEL %s -; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer -; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load (s128) from %ir.13, addrspace 4) +@0 = external dso_local addrspace(4) constant [4 x <2 x float>] +@1 = external dso_local addrspace(4) constant i32 +; Test that DAG->DAG ISel is able to pick up the S_LOAD_DWORDX4_SGPR instruction that fetches the offset +; from a register. +; GCN-LABEL: name: test_load_zext +; SDAG: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer +; SDAG: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load (s128) from %ir.13, addrspace 4) ; GISEL: $[[OFFSET:.*]] = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer ; GISEL: S_LOAD_DWORDX4_SGPR killed renamable {{.*}}, killed renamable $[[OFFSET]], 0 :: (invariant load (<4 x s32>) from {{.*}}, addrspace 4) - define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 { .entry: %5 = call i64 @llvm.amdgcn.s.getpc() #3 @@ -27,6 +28,21 @@ ret void } +; Make sure we match complex register offsets, which may come before the +; base operand in the load's SDAG nodes. +; GCN-LABEL: name: test_complex_reg_offset +; CCN: S_LOAD_DWORD_IMM +; GCN: S_LOAD_DWORD_SGPR +define amdgpu_ps void @test_complex_reg_offset(float addrspace(1)* %out) { + %i = load i32, i32 addrspace(4)* @1 + %i1 = and i32 %i, 3 + %i2 = zext i32 %i1 to i64 + %i3 = getelementptr [4 x <2 x float>], [4 x <2 x float>] addrspace(4)* @0, i64 0, i64 %i2, i64 0 + %i4 = load float, float addrspace(4)* %i3, align 4 + store float %i4, float addrspace(1)* %out + ret void +} + declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #1 ; Function Attrs: nounwind readnone speculatable