Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1394,6 +1394,14 @@ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5; if (VecSize <= 256) // 32 bytes return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8; + if (VecSize <= 288) // 36 bytes + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9; + if (VecSize <= 320) // 40 bytes + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10; + if (VecSize <= 352) // 44 bytes + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11; + if (VecSize <= 384) // 48 bytes + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12; if (VecSize <= 512) // 64 bytes return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16; if (VecSize <= 1024) // 128 bytes @@ -2084,6 +2092,10 @@ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4: case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5: case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12: case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16: case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32: case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1: Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -729,6 +729,10 @@ def S_INDIRECT_REG_WRITE_MOVREL_B32_V4 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; def S_INDIRECT_REG_WRITE_MOVREL_B32_V5 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; def S_INDIRECT_REG_WRITE_MOVREL_B32_V8 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B32_V9 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B32_V10 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B32_V11 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B32_V12 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; def S_INDIRECT_REG_WRITE_MOVREL_B32_V16 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; def S_INDIRECT_REG_WRITE_MOVREL_B32_V32 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir @@ -174,6 +174,142 @@ S_ENDPGM 0, implicit %3 ... +--- +name: insert_vector_elt_s_s32_v9s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8, $sgpr9, $sgpr10 + + ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v9s32 + ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8, $sgpr9, $sgpr10 + ; MOVREL-NEXT: {{ $}} + ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_288 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 + ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10 + ; MOVREL-NEXT: $m0 = COPY [[COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V9_:%[0-9]+]]:sgpr_288 = S_INDIRECT_REG_WRITE_MOVREL_B32_V9 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V9_]] + ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v9s32 + ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8, $sgpr9, $sgpr10 + ; GPRIDX-NEXT: {{ $}} + ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_288 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 + ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10 + ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V9_:%[0-9]+]]:sgpr_288 = S_INDIRECT_REG_WRITE_MOVREL_B32_V9 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V9_]] + %0:sgpr(<9 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 + %1:sgpr(s32) = COPY $sgpr9 + %2:sgpr(s32) = COPY $sgpr10 + %3:sgpr(<9 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: insert_vector_elt_s_s32_v10s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, $sgpr10, $sgpr11 + + ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v10s32 + ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, $sgpr10, $sgpr11 + ; MOVREL-NEXT: {{ $}} + ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_320 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr10 + ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr11 + ; MOVREL-NEXT: $m0 = COPY [[COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V10_:%[0-9]+]]:sgpr_320 = S_INDIRECT_REG_WRITE_MOVREL_B32_V10 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V10_]] + ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v10s32 + ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, $sgpr10, $sgpr11 + ; GPRIDX-NEXT: {{ $}} + ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_320 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr10 + ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr11 + ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V10_:%[0-9]+]]:sgpr_320 = S_INDIRECT_REG_WRITE_MOVREL_B32_V10 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V10_]] + %0:sgpr(<10 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + %1:sgpr(s32) = COPY $sgpr10 + %2:sgpr(s32) = COPY $sgpr11 + %3:sgpr(<10 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: insert_vector_elt_s_s32_v11s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, $sgpr11, $sgpr12 + + ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v11s32 + ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, $sgpr11, $sgpr12 + ; MOVREL-NEXT: {{ $}} + ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_352 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr11 + ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr12 + ; MOVREL-NEXT: $m0 = COPY [[COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V11_:%[0-9]+]]:sgpr_352 = S_INDIRECT_REG_WRITE_MOVREL_B32_V11 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V11_]] + ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v11s32 + ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, $sgpr11, $sgpr12 + ; GPRIDX-NEXT: {{ $}} + ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_352 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr11 + ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr12 + ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V11_:%[0-9]+]]:sgpr_352 = S_INDIRECT_REG_WRITE_MOVREL_B32_V11 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V11_]] + %0:sgpr(<11 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + %1:sgpr(s32) = COPY $sgpr11 + %2:sgpr(s32) = COPY $sgpr12 + %3:sgpr(<11 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: insert_vector_elt_s_s32_v12s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr12, $sgpr13 + + ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v12s32 + ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr12, $sgpr13 + ; MOVREL-NEXT: {{ $}} + ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_384 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr12 + ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr13 + ; MOVREL-NEXT: $m0 = COPY [[COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V12_:%[0-9]+]]:sgpr_384 = S_INDIRECT_REG_WRITE_MOVREL_B32_V12 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V12_]] + ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v12s32 + ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr12, $sgpr13 + ; GPRIDX-NEXT: {{ $}} + ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_384 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr12 + ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr13 + ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V12_:%[0-9]+]]:sgpr_384 = S_INDIRECT_REG_WRITE_MOVREL_B32_V12 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V12_]] + %0:sgpr(<12 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + %1:sgpr(s32) = COPY $sgpr12 + %2:sgpr(s32) = COPY $sgpr13 + %3:sgpr(<12 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + --- name: insert_vector_elt_s_s32_v16s32 legalized: true