Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4896,6 +4896,7 @@ return Legalized; } case TargetOpcode::G_EXTRACT: + case TargetOpcode::G_EXTRACT_VECTOR_ELT: if (TypeIdx != 1) return UnableToLegalize; Observer.changingInstr(MI); @@ -4903,6 +4904,7 @@ Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_INSERT: + case TargetOpcode::G_INSERT_VECTOR_ELT: case TargetOpcode::G_FREEZE: case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1450,6 +1450,22 @@ unsigned IdxTypeIdx = 2; getActionDefinitionsBuilder(Op) + .moreElementsIf( + [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[VecTypeIdx]; + return Ty.getSizeInBits() < 1024 && + !SIRegisterInfo::getSGPRClassForBitWidth(Ty.getSizeInBits()); + }, [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[VecTypeIdx]; + unsigned NumElts = Ty.getNumElements(); + unsigned EltSize = Ty.getElementType().getSizeInBits(); + + // Find the nearest legal RegClass that is larger than the current type. + while (!SIRegisterInfo::getSGPRClassForBitWidth(NumElts * EltSize)) + ++NumElts; + + return std::pair(VecTypeIdx, LLT::fixed_vector(NumElts, EltSize)); + }) .customIf([=](const LegalityQuery &Query) { const LLT EltTy = Query.Types[EltTypeIdx]; const LLT VecTy = Query.Types[VecTypeIdx]; @@ -1534,6 +1550,22 @@ } auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR) + .moreElementsIf( + [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[0]; + return Ty.getSizeInBits() < 1024 && + !SIRegisterInfo::getSGPRClassForBitWidth(Ty.getSizeInBits()); + }, [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[0]; + unsigned NumElts = Ty.getNumElements(); + unsigned EltSize = Ty.getElementType().getSizeInBits(); + + // Find the nearest legal RegClass that is larger than the current type. + while (!SIRegisterInfo::getSGPRClassForBitWidth(NumElts * EltSize)) + ++NumElts; + + return std::pair(0, LLT::fixed_vector(NumElts, EltSize)); + }) .legalForCartesianProduct(AllS32Vectors, {S32}) .legalForCartesianProduct(AllS64Vectors, {S64}) .clampNumElements(0, V16S32, V32S32)