Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -2785,7 +2785,8 @@ // LLVM_FALLTHROUGH; case AMDGPUAS::GLOBAL_ADDRESS: { - if (isMemOpUniform(Load) && isMemOpHasNoClobberedMemOperand(Load)) + if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && + isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private Index: test/CodeGen/AMDGPU/mesa_regression.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/mesa_regression.ll @@ -0,0 +1,11 @@ +; RUN: llc -O2 -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=false -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: %entry +; CHECK: flat_load_dwordx4 + +define amdgpu_kernel void @store_global(<16 x double> addrspace(1)* nocapture %out, <16 x double> addrspace(1)* nocapture readonly %in) { +entry: + %tmp = load <16 x double>, <16 x double> addrspace(1)* %in + store <16 x double> %tmp, <16 x double> addrspace(1)* %out + ret void +}