Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -308,6 +308,11 @@ /// Keep the same scalar or element type as the given type. LegalizeMutation changeElementTo(unsigned TypeIdx, LLT Ty); +/// Change the scalar size or element size to have the same scalar size as type +/// index \p FromIndex. Unlike changeElementTo, this discards pointer types and +/// only changes the size. +LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx); + /// Widen the scalar type or vector element type for the given type index to the /// next power of 2. LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0); @@ -872,6 +877,24 @@ changeElementTo(typeIdx(TypeIdx), Ty)); } + /// Conditionally limit the minimum size of the scalar. + /// For example, when the minimum size of one type depends on the size of + /// another such as extracting N bits from an M bit container. + LegalizeRuleSet &minScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, + const LLT Ty) { + using namespace LegalityPredicates; + using namespace LegalizeMutations; + return actionIf( + LegalizeAction::WidenScalar, + [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.isScalar() && + QueryTy.getSizeInBits() < Ty.getSizeInBits() && + Predicate(Query); + }, + changeElementTo(typeIdx(TypeIdx), Ty)); + } + /// Limit the range of scalar sizes to MinTy and MaxTy. LegalizeRuleSet &clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy) { @@ -893,12 +916,25 @@ return Query.Types[LargeTypeIdx].getScalarSizeInBits() > Query.Types[TypeIdx].getSizeInBits(); }, + LegalizeMutations::changeElementSizeTo(TypeIdx, LargeTypeIdx)); + } + + /// Narrow the scalar to match the size of another. + LegalizeRuleSet &maxScalarSameAs(unsigned TypeIdx, unsigned NarrowTypeIdx) { + typeIdx(TypeIdx); + return narrowScalarIf( [=](const LegalityQuery &Query) { - const LLT Ty = Query.Types[TypeIdx]; - const LLT LargeTy = Query.Types[LargeTypeIdx]; - LLT NewEltTy = LLT::scalar(LargeTy.getScalarSizeInBits()); - return std::make_pair(TypeIdx, Ty.changeElementType(NewEltTy)); - }); + return Query.Types[NarrowTypeIdx].getScalarSizeInBits() < + Query.Types[TypeIdx].getSizeInBits(); + }, + LegalizeMutations::changeElementSizeTo(TypeIdx, NarrowTypeIdx)); + } + + /// Change the type \p TypeIdx to have the same scalar size as type \p + /// SameSizeIdx. + LegalizeRuleSet &scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx) { + return minScalarSameAs(TypeIdx, SameSizeIdx) + .maxScalarSameAs(TypeIdx, SameSizeIdx); } /// Conditionally widen the scalar or elt to match the size of another. Index: llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -43,6 +43,16 @@ }; } +LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx, + unsigned FromTypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT OldTy = Query.Types[TypeIdx]; + const LLT NewTy = Query.Types[FromTypeIdx]; + const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits()); + return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy)); + }; +} + LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min) { return [=](const LegalityQuery &Query) { Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -692,10 +692,8 @@ .scalarize(0); getActionDefinitionsBuilder(G_PTRMASK) - .legalIf(typeInSet(1, {S64, S32})) - .minScalar(1, S32) - .maxScalarIf(sizeIs(0, 32), 1, S32) - .maxScalarIf(sizeIs(0, 64), 1, S64) + .legalIf(all(sameSize(0, 1), typeInSet(1, {S64, S32}))) + .scalarSameSizeAs(1, 0) .scalarize(0); auto &CmpBuilder = Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir @@ -337,31 +337,6 @@ ... ---- -name: ptrmask_p0_s32_sgpr_sgpr_sgpr -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 - - ; CHECK-LABEL: name: ptrmask_p0_s32_sgpr_sgpr_sgpr - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY1]], implicit-def $scc - ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[COPY1]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:sgpr(p0) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(p0) = G_PTRMASK %0, %1 - S_ENDPGM 0, implicit %2 - -... - --- name: ptrmask_p0_s64_sgpr_sgpr_clearhi1 legalized: true @@ -756,55 +731,6 @@ ... ---- -name: ptrmask_p0_s32_vgpr_vgpr_vgpr -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1, $vgpr2 - - ; CHECK-LABEL: name: ptrmask_p0_s32_vgpr_vgpr_vgpr - ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY1]], implicit $exec - ; CHECK: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY3]], [[COPY1]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(p0) = G_PTRMASK %0, %1 - S_ENDPGM 0, implicit %2 - -... - ---- -name: ptrmask_p0_s32_vgpr_vgpr_vgpr_0xffffffff -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1, $vgpr2 - - ; CHECK-LABEL: name: ptrmask_p0_s32_vgpr_vgpr_vgpr_0xffffffff - ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] - %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_CONSTANT i32 -1 - %2:vgpr(p0) = G_PTRMASK %0, %1 - S_ENDPGM 0, implicit %2 - -... - --- name: ptrmask_p0_s64_vgpr_vgpr_clearlo1 legalized: true Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir @@ -10,10 +10,10 @@ ; CHECK-LABEL: name: ptrmask_p1_s16 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[AND]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[AND]](s64) ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -31,7 +31,8 @@ ; CHECK-LABEL: name: ptrmask_p1_s32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY1]](s32) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[ZEXT]](s64) ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -83,10 +84,10 @@ ; CHECK-LABEL: name: ptrmask_p0_s16 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[AND]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[AND]](s64) ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -104,7 +105,8 @@ ; CHECK-LABEL: name: ptrmask_p0_s32 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY1]](s32) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[ZEXT]](s64) ; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -194,7 +196,8 @@ ; CHECK-LABEL: name: ptrmask_p3_s64 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s32) ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) %0:_(p3) = COPY $vgpr0 %1:_(s64) = COPY $vgpr1_vgpr2