Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -319,12 +319,12 @@ } } -static bool isInstrUniform(const MachineInstr &MI) { +static bool isInstrUniformNonExtLoad(const MachineInstr &MI) { if (!MI.hasOneMemOperand()) return false; const MachineMemOperand *MMO = *MI.memoperands_begin(); - return AMDGPUInstrInfo::isUniformMMO(MMO); + return MMO->getSize() >= 4 && AMDGPUInstrInfo::isUniformMMO(MMO); } RegisterBankInfo::InstructionMappings @@ -425,7 +425,7 @@ unsigned PtrSize = PtrTy.getSizeInBits(); unsigned AS = PtrTy.getAddressSpace(); LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); - if (isInstrUniform(MI) && + if (isInstrUniformNonExtLoad(MI) && (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) { const InstructionMapping &SSMapping = getInstructionMapping( 1, 1, getOperandsMapping( @@ -1481,7 +1481,7 @@ const ValueMapping *ValMapping; const ValueMapping *PtrMapping; - if (isInstrUniform(MI) && + if (isInstrUniformNonExtLoad(MI) && (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) { // We have a uniform instruction so we want to use an SMRD load ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -61,6 +61,10 @@ define amdgpu_kernel void @load_constant_v8i64_uniform() {ret void} define amdgpu_kernel void @load_local_uniform() { ret void } define amdgpu_kernel void @load_region_uniform() { ret void } + define amdgpu_kernel void @extload_constant_i8_to_i32_uniform() { ret void } + define amdgpu_kernel void @extload_global_i8_to_i32_uniform() { ret void } + define amdgpu_kernel void @extload_constant_i16_to_i32_uniform() { ret void } + define amdgpu_kernel void @extload_global_i16_to_i32_uniform() { ret void } declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } @@ -519,3 +523,66 @@ %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5) ... + +--- +name : extload_constant_i8_to_i32_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: extload_constant_i8_to_i32_uniform + ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: %2:vgpr(p4) = COPY %0(p4) + ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 4) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 4, align 1) +... + +--- +name : extload_global_i8_to_i32_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: extload_global_i8_to_i32_uniform{{$}} + ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: %2:vgpr(p4) = COPY %0(p4) + ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 1) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 1, align 1) +... + +--- +name : extload_constant_i16_to_i32_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: extload_constant_i16_to_i32_uniform + ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: %2:vgpr(p4) = COPY %0(p4) + ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 4) + + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 4, align 2) +... + +--- +name : extload_global_i16_to_i32_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: extload_global_i16_to_i32_uniform + ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: %2:vgpr(p4) = COPY %0(p4) + ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 1) + + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 1, align 2) +...