Index: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -422,9 +422,11 @@ MachineMemOperand::MODereferenceable, 16, 4); unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; + const GCNSubtarget &Subtarget = MF.getSubtarget(); + unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset); BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) .addReg(Rsrc01) - .addImm(Offset) // offset + .addImm(EncodedOffset) // offset .addImm(0) // glc .addReg(ScratchRsrcReg, RegState::ImplicitDefine) .addMemOperand(MMO); Index: llvm/trunk/test/CodeGen/AMDGPU/amdpal.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdpal.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdpal.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tahiti | FileCheck --check-prefix=PAL --enable-var-scope %s +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tahiti | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tonga | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s ; PAL-NOT: .AMDGPU.config ; PAL-LABEL: {{^}}simple: @@ -55,11 +56,13 @@ ; Check code sequence for amdpal use of scratch for alloca in a compute shader. ; The scratch descriptor is loaded from offset 0x10 of the GIT, rather than offset ; 0 in a graphics shader. +; Prior to GCN3 s_load_dword offsets are dwords, so the offset will be 0x4. ; PAL-LABEL: {{^}}scratch2_cs: ; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234 ; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0 -; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:{{[0-9]+\]}}, 0x10 +; CI: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:{{[0-9]+\]}}, 0x4 +; VI: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:{{[0-9]+\]}}, 0x10 ; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]: define amdgpu_cs void @scratch2_cs(i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <3 x i32> %coord, <2 x i32> %in, i32 %extra, i32 %idx) #0 {