Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3660,12 +3660,15 @@ // Legalize MIMG and MUBUF/MTBUF for shaders. // - // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via - // scratch memory access. In both cases, the legalization never involves - // conversion to the addr64 form. + // Shaders only generate non-addr64 MUBUF/MTBUF instructions via intrinsics + // or via scratch memory access. In both cases, the legalization never + // involves conversion to the addr64 form. + // (Checking for a named operand "vaddr" is a way of checking whether the + // instruction is already addr64.) if (isMIMG(MI) || (AMDGPU::isShader(MF.getFunction().getCallingConv()) && - (isMUBUF(MI) || isMTBUF(MI)))) { + (isMUBUF(MI) || isMTBUF(MI)) + && !getNamedOperand(MI, AMDGPU::OpName::vaddr))) { MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc); if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) { unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI); Index: test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SICI %s + +; GCN-LABEL: {{^}}main: +; GCN-NOT: readfirstlane +; SICI: buffer_load_dwordx4 {{.*}} addr64 + +@indexable = internal unnamed_addr addrspace(1) constant [6 x <3 x float>] [<3 x float> , <3 x float> , <3 x float> , <3 x float> , <3 x float> , <3 x float> ] + +define amdgpu_ps float @main(i32 %arg18) { +.entry: + %tmp31 = sext i32 %arg18 to i64 + %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31 + %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 + %tmp34 = extractelement <3 x float> %tmp33, i32 0 + ret float %tmp34 +} +