Index: lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -253,6 +253,10 @@ for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { int64_t Offset = Offsets[i]; SDNode *Load = O2SMap[Offset]; + // If BASELOAD depends on LOAD, we could not schedule BASELOAD ahead of LOAD. + // We continue to search for other candidates. + if (Load->isOperandOf(BaseLoad)) + continue; if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads)) break; // Stop right here. Ignore loads that are further away. Loads.push_back(Load); Index: test/CodeGen/AMDGPU/cluster-neighboring-loads.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/cluster-neighboring-loads.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s + +; GCN-LABEL: {{^}}cluster_neighboring_loads: +; GCN: buffer_load_ushort [[DST:v[0-9]+]], off, [[RSRC:s\[[0-9]+:[0-9]+\]]], [[SOFF:s[0-9]+]] offset:2 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_load_short_d16_hi [[DST]], off, [[RSRC]], [[SOFF]] + +define amdgpu_kernel void @cluster_neighboring_loads() { +bb: + %loads = load <2 x half>, <2 x half> addrspace(5)* null, align 2 + %shuffled = shufflevector <2 x half> %loads, <2 x half> undef, <2 x i32> + br label %bb1 + +bb1: + call void asm sideeffect "; use $0", "v"(<2 x half> %shuffled) + br label %bb1 +}