Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2538,7 +2538,8 @@ MIRBuilder.setInstr(MI); unsigned Opc = MI.getOpcode(); switch (Opc) { - case TargetOpcode::G_IMPLICIT_DEF: { + case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_LOAD: { Observer.changingInstr(MI); moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -482,6 +482,7 @@ [](const LegalityQuery &Query) { return std::make_pair(0, LLT::scalar(32)); }) + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .fewerElementsIf([=](const LegalityQuery &Query) { unsigned MemSize = Query.MMODescrs[0].SizeInBits; return (MemSize == 96) && Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s --- name: test_load_global_i32 @@ -390,3 +390,24 @@ %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, addrspace 1, align 16) $vgpr0_vgpr1_vgpr2 = COPY %1 ... + +--- +name: test_load_global_v3s8_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_v3s8_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s8>) + ; VI-LABEL: name: test_load_global_v3s8_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s8>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 1) + S_NOP 0, implicit %1 +...