diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1772,27 +1772,42 @@ } } - assert(EltSize == 32 && "unhandled elt size"); + if (EltSize == 32) { + static const int16_t Sub0_31_256[] = { + AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, + AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, + AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23, + AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + }; - static const int16_t Sub0_31_256[] = { - AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, - AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, - AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23, - AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 - }; + static const int16_t Sub0_15_256[] = { + AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, + AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + }; + + switch (AMDGPU::getRegBitWidth(*RC->MC)) { + case 256: + return {}; + case 512: + return makeArrayRef(Sub0_15_256); + case 1024: + return makeArrayRef(Sub0_31_256); + default: + llvm_unreachable("unhandled register size"); + } + } - static const int16_t Sub0_15_256[] = { - AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, - AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + assert(EltSize == 64 && "unhandled elt size"); + static const int16_t Sub0_31_512[] = { + AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, + AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 }; switch (AMDGPU::getRegBitWidth(*RC->MC)) { - case 256: - return {}; case 512: - return makeArrayRef(Sub0_15_256); + return {}; case 1024: - return makeArrayRef(Sub0_31_256); + return makeArrayRef(Sub0_31_512); default: llvm_unreachable("unhandled register size"); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir @@ -587,6 +587,26 @@ ... --- +name: test_concat_vectors_s_v32s32_s_v16s32_s_v16s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + + ; GCN-LABEL: name: test_concat_vectors_s_v32s32_s_v16s32_s_v16s32 + ; GCN: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN: [[COPY1:%[0-9]+]]:sreg_512 = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[REG_SEQUENCE]] + %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(<16 x s32>) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %2:sgpr(<32 x s32>) = G_CONCAT_VECTORS %0, %1 + $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY %2 +... + +--- name: test_concat_vectors_s_v4s64_s_v2s64_s_v2s64 legalized: true regBankSelected: true