Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1773,19 +1773,19 @@ } case TargetOpcode::G_EXTRACT_VECTOR_ELT: { if (TypeIdx == 0) { - Register VecReg = MI.getOperand(1).getReg(); - LLT VecTy = MRI.getType(VecReg); Observer.changingInstr(MI); - - widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), - WideTy.getSizeInBits()), - 1, TargetOpcode::G_SEXT); - widenScalarDst(MI, WideTy, 0); Observer.changedInstr(MI); return Legalized; } + if (TypeIdx == 1) { + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); + return Legalized; + } + if (TypeIdx != 2) return UnableToLegalize; Observer.changingInstr(MI); @@ -1795,16 +1795,17 @@ return Legalized; } case TargetOpcode::G_INSERT_VECTOR_ELT: { - if (TypeIdx == 1) { + if (TypeIdx == 0) { Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + } - Register VecReg = MI.getOperand(1).getReg(); - LLT VecTy = MRI.getType(VecReg); - LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy); - - widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT); + if (TypeIdx == 1) { + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); - widenScalarDst(MI, WideVecTy, 0); Observer.changedInstr(MI); return Legalized; } Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -919,14 +919,15 @@ const LLT EltTy = Query.Types[EltTypeIdx]; const LLT VecTy = Query.Types[VecTypeIdx]; const LLT IdxTy = Query.Types[IdxTypeIdx]; - return (EltTy.getSizeInBits() == 16 || + return EltTy.getSizeInBits() == VecTy.getScalarSizeInBits() && + (EltTy.getSizeInBits() == 16 || EltTy.getSizeInBits() % 32 == 0) && VecTy.getSizeInBits() % 32 == 0 && VecTy.getSizeInBits() <= 1024 && IdxTy.getSizeInBits() == 32; }) .clampScalar(EltTypeIdx, S32, S64) - .clampScalar(VecTypeIdx, S32, S64) + .clampScalarOrElt(VecTypeIdx, S32, S64) .clampScalar(IdxTypeIdx, S32, S32); } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -150,9 +150,9 @@ ; CHECK-LABEL: name: insert_vector_elt_0_v2i8_i32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY [[DEF]](<2 x s32>) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY1]], [[COPY2]](s32), 0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[DEF]](<2 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY2]], [[COPY1]](s32), 0 ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY [[INSERT]](<2 x s32>) ; CHECK: $vgpr0_vgpr1 = COPY [[COPY3]](<2 x s32>) %0:_(s32) = COPY $vgpr0