Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1964,6 +1964,12 @@ std::tie(IdxBaseReg, Offset, Unused) = AMDGPU::getBaseWithConstantOffset(MRI, IdxReg); + if (IdxBaseReg == AMDGPU::NoRegister) { + // This will happen if the index is a known constant. This should ordinarily + // be legalized out, but handle it as a register just in case. + assert(Offset == 0); + IdxBaseReg = IdxReg; + } ArrayRef SubRegs = TRI.getRegSplitParts(SuperRC, EltSize); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir @@ -784,3 +784,58 @@ %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 S_ENDPGM 0, implicit %4 ... + +--- +name: extract_vector_elt_s_s32_v4s32_const_idx +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v4s32_const_idx + ; MOVREL: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MOVREL: $m0 = COPY [[S_MOV_B32_]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v4s32_const_idx + ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GPRIDX: $m0 = COPY [[S_MOV_B32_]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s32) = G_CONSTANT i32 0 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v4s32_const_idx +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v4s32_const_idx + ; MOVREL: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MOVREL: $m0 = COPY [[S_MOV_B32_]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32_const_idx + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GPRIDX: S_SET_GPR_IDX_ON [[S_MOV_B32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s32) = G_CONSTANT i32 0 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir @@ -624,3 +624,66 @@ %5:sgpr(<8 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %4 S_ENDPGM 0, implicit %5 ... + +# This should have been folded out in the legalizer, but make sure it +# doesn't crash. +--- +name: insert_vector_elt_s_s32_v4s32_const_idx +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 + + ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v4s32_const_idx + ; MOVREL: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MOVREL: $m0 = COPY [[S_MOV_B32_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]] + ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v4s32_const_idx + ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GPRIDX: $m0 = COPY [[S_MOV_B32_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]] + %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s32) = COPY $sgpr4 + %2:sgpr(s32) = G_CONSTANT i32 0 + %3:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: insert_vector_elt_v_s32_v4s32_const_idx +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 + + ; MOVREL-LABEL: name: insert_vector_elt_v_s32_v4s32_const_idx + ; MOVREL: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MOVREL: $m0 = COPY [[S_MOV_B32_]] + ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]] + ; GPRIDX-LABEL: name: insert_vector_elt_v_s32_v4s32_const_idx + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GPRIDX: S_SET_GPR_IDX_ON [[S_MOV_B32_]], 8, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]] + %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s32) = COPY $sgpr4 + %2:sgpr(s32) = G_CONSTANT i32 0 + %3:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + S_ENDPGM 0, implicit %3 +...