Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2363,6 +2363,18 @@ moreElementsVectorSrc(MI, MoreTy, 1); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_SELECT: + if (TypeIdx != 0) + return UnableToLegalize; + if (MRI.getType(MI.getOperand(1).getReg()).isVector()) + return UnableToLegalize; + + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 2); + moreElementsVectorSrc(MI, MoreTy, 3); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; default: return UnableToLegalize; } Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -71,6 +71,12 @@ }; } +static LegalityPredicate numElementsNotEven(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0; + }; +} AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM) { @@ -458,28 +464,13 @@ GlobalPtr, LocalPtr, FlatPtr, PrivatePtr, LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1}) .clampScalar(0, S32, S64) - .fewerElementsIf( - [=](const LegalityQuery &Query) { - if (Query.Types[1].isVector()) - return true; - - LLT Ty = Query.Types[0]; - - // FIXME: Hack until odd splits handled - return Ty.isVector() && - (Ty.getScalarSizeInBits() > 32 || Ty.getNumElements() % 2 != 0); - }, - scalarize(0)) - // FIXME: Handle 16-bit vectors better - .fewerElementsIf( - [=](const LegalityQuery &Query) { - return Query.Types[0].isVector() && - Query.Types[0].getElementType().getSizeInBits() < 32;}, - scalarize(0)) + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) + .fewerElementsIf(numElementsNotEven(0), scalarize(0)) .scalarize(1) .clampMaxNumElements(0, S32, 2) .clampMaxNumElements(0, LocalPtr, 2) .clampMaxNumElements(0, PrivatePtr, 2) + .scalarize(0) .legalIf(all(isPointer(0), typeIs(1, S1))); // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can Index: test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -248,23 +248,32 @@ ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) ; CHECK: [[TRUNC1:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY2]](<3 x s32>) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<3 x s8>) - ; CHECK: [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC1]](<3 x s8>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[TRUNC1]](<3 x s8>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s32) ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]] ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT1]](s32) ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]] ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT6]](<3 x s32>) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 @@ -365,24 +374,15 @@ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY2]](<4 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s16>), 0 - ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[INSERT]], [[INSERT1]] + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[SELECT]](<4 x s16>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(s32) = COPY $vgpr0 %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 %2:_(<4 x s16>) = COPY $vgpr3_vgpr4