Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -422,21 +422,24 @@ } case TargetOpcode::G_LOAD: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + LLT PtrTy = MRI.getType(MI.getOperand(1).getReg()); + unsigned PtrSize = PtrTy.getSizeInBits(); + unsigned AS = PtrTy.getAddressSpace(); LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); - // FIXME: Should we be hard coding the size for these mappings? - if (isInstrUniform(MI)) { + if (isInstrUniform(MI) && + (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) { const InstructionMapping &SSMapping = getInstructionMapping( 1, 1, getOperandsMapping( {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}), 2); // Num Operands AltMappings.push_back(&SSMapping); } const InstructionMapping &VVMapping = getInstructionMapping( 2, 1, getOperandsMapping( - {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy), - AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}), + {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy), + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}), 2); // Num Operands AltMappings.push_back(&VVMapping); @@ -1471,18 +1474,21 @@ SmallVector OpdsMapping(MI.getNumOperands()); unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); - unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); + Register PtrReg = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(PtrReg); + unsigned AS = PtrTy.getAddressSpace(); + unsigned PtrSize = PtrTy.getSizeInBits(); const ValueMapping *ValMapping; const ValueMapping *PtrMapping; - if (isInstrUniform(MI)) { + if (isInstrUniform(MI) && + (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) { // We have a uniform instruction so we want to use an SMRD load ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize); } else { ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy); - // FIXME: What would happen if we used SGPRRegBankID here? PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize); } Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -1,8 +1,6 @@ # RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s # RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s -# REQUIRES: global-isel - --- | define amdgpu_kernel void @load_global_v8i32_non_uniform(<8 x i32> addrspace(1)* %in) { %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -56,10 +54,14 @@ %tmp2 = load <8 x i64>, <8 x i64> addrspace(4)* %constant.not.uniform.v8i64 ret void } + define amdgpu_kernel void @load_constant_v8i32_uniform() {ret void} define amdgpu_kernel void @load_constant_v4i64_uniform() {ret void} define amdgpu_kernel void @load_constant_v16i32_uniform() {ret void} define amdgpu_kernel void @load_constant_v8i64_uniform() {ret void} + define amdgpu_kernel void @load_local_uniform() { ret void } + define amdgpu_kernel void @load_region_uniform() { ret void } + declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } ... @@ -486,3 +488,34 @@ %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load 64, addrspace 4) ... + +--- +name : load_local_uniform +legalized: true +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: load_local_uniform + ; CHECK: %0:sgpr(p3) = COPY $sgpr0 + ; CHECK: %2:vgpr(p3) = COPY %0(p3) + ; CHECK: %1:vgpr(s32) = G_LOAD %2(p3) :: (load 4, addrspace 3) + + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 3) + +... +--- +name : load_region_uniform +legalized: true +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: load_region_uniform + ; CHECK: %0:sgpr(p3) = COPY $sgpr0 + ; CHECK: %2:vgpr(p3) = COPY %0(p3) + ; CHECK: %1:vgpr(s32) = G_LOAD %2(p3) :: (load 4, addrspace 5) + + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5) + +...