Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2538,12 +2538,20 @@ MIRBuilder.setInstr(MI); unsigned Opc = MI.getOpcode(); switch (Opc) { - case TargetOpcode::G_IMPLICIT_DEF: { + case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_LOAD: { + if (TypeIdx != 0) + return UnableToLegalize; Observer.changingInstr(MI); moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_STORE: + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -482,6 +482,7 @@ [](const LegalityQuery &Query) { return std::make_pair(0, LLT::scalar(32)); }) + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .fewerElementsIf([=](const LegalityQuery &Query) { unsigned MemSize = Query.MMODescrs[0].SizeInBits; return (MemSize == 96) && Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s --- name: test_load_global_i32 @@ -390,3 +390,24 @@ %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, addrspace 1, align 16) $vgpr0_vgpr1_vgpr2 = COPY %1 ... + +--- +name: test_load_global_v3s8_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_v3s8_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s8>) + ; VI-LABEL: name: test_load_global_v3s8_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s8>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 1) + S_NOP 0, implicit %1 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s --- name: test_store_global_i32 body: | @@ -227,6 +227,16 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-LABEL: name: test_truncstore_global_s128_to_s16 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) + ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI-LABEL: name: test_truncstore_global_s128_to_s16 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) + ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store 2, addrspace 1) @@ -238,12 +248,12 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-LABEL: name: test_truncstore_global_s128_to_s16 + ; SI-LABEL: name: test_truncstore_global_s128_to_s8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) - ; VI-LABEL: name: test_truncstore_global_s128_to_s16 + ; VI-LABEL: name: test_truncstore_global_s128_to_s8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) @@ -371,3 +381,25 @@ G_STORE %1, %0 :: (store 16, addrspace 1) ... + +--- +name: test_store_global_v3s8_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; SI-LABEL: name: test_store_global_v3s8_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF + ; SI: G_STORE %2:_(<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT %2:_(<4 x s8>), 0 + ; VI-LABEL: name: test_store_global_v3s8_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF + ; VI: G_STORE %2:_(<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT %2:_(<4 x s8>), 0 + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<3 x s8>) = G_IMPLICIT_DEF + G_STORE %1, %0 :: (store 3, addrspace 1, align 4) + +...