Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -82,6 +82,21 @@ }; } +// Any combination of 32 or 64-bit elements up to 512 bits, and multiples of +// v2s16. +static LegalityPredicate isRegisterType(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + if (Ty.isVector()) { + const int EltSize = Ty.getElementType().getSizeInBits(); + return EltSize == 32 || EltSize == 64 || + (EltSize == 16 && Ty.getNumElements() % 2 == 0); + } + + return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512; + }; +} + AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const GCNTargetMachine &TM) : ST(ST_) { @@ -103,7 +118,6 @@ const LLT V2S16 = LLT::vector(2, 16); const LLT V4S16 = LLT::vector(4, 16); - const LLT V8S16 = LLT::vector(8, 16); const LLT V2S32 = LLT::vector(2, 32); const LLT V3S32 = LLT::vector(3, 32); @@ -797,17 +811,9 @@ Query.Types[0].getScalarSizeInBits() == 64; }); - // TODO: Support any combination of v2s32 + getActionDefinitionsBuilder(G_CONCAT_VECTORS) - .legalFor({{V4S32, V2S32}, - {V8S32, V2S32}, - {V8S32, V4S32}, - {V4S64, V2S64}, - {V4S16, V2S16}, - {V8S16, V2S16}, - {V8S16, V4S16}, - {LLT::vector(4, LocalPtr), LLT::vector(2, LocalPtr)}, - {LLT::vector(4, PrivatePtr), LLT::vector(2, PrivatePtr)}}); + .legalIf(isRegisterType(0)); // Merge/Unmerge for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { Index: test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir @@ -127,3 +127,88 @@ %2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 ... + +--- +name: concat_vectors_v2p1_v2p1 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-LABEL: name: concat_vectors_v2p1_v2p1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p1>) = G_CONCAT_VECTORS [[COPY]](<2 x p1>), [[COPY1]](<2 x p1>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p1>) + %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x p1>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x p1>) = G_CONCAT_VECTORS %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 +... + +--- +name: concat_vectors_v2p0_v2p0 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-LABEL: name: concat_vectors_v2p0_v2p0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p0>) = G_CONCAT_VECTORS [[COPY]](<2 x p0>), [[COPY1]](<2 x p0>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p0>) + %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x p0>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x p0>) = G_CONCAT_VECTORS %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 +... + +--- +name: concat_vectors_v2p3_v2p3 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: concat_vectors_v2p3_v2p3 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p3>) = G_CONCAT_VECTORS [[COPY]](<2 x p3>), [[COPY1]](<2 x p3>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p3>) + %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 + %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 + %2:_(<4 x p3>) = G_CONCAT_VECTORS %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + +--- +name: concat_vectors_v2p5_v2p5 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: concat_vectors_v2p5_v2p5 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p5>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p5>) = COPY $vgpr2_vgpr3 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p5>) = G_CONCAT_VECTORS [[COPY]](<2 x p5>), [[COPY1]](<2 x p5>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p5>) + %0:_(<2 x p5>) = COPY $vgpr0_vgpr1 + %1:_(<2 x p5>) = COPY $vgpr2_vgpr3 + %2:_(<4 x p5>) = G_CONCAT_VECTORS %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + +--- +name: concat_vectors_v2p999_v2p999 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-LABEL: name: concat_vectors_v2p999_v2p999 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p999>) = G_CONCAT_VECTORS [[COPY]](<2 x p999>), [[COPY1]](<2 x p999>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p999>) + %0:_(<2 x p999>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x p999>) = G_CONCAT_VECTORS %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 +...