diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -45,7 +45,6 @@ const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); const LLT s128 = LLT::scalar(128); - const LLT s256 = LLT::scalar(256); const LLT v16s8 = LLT::fixed_vector(16, 8); const LLT v8s8 = LLT::fixed_vector(8, 8); const LLT v4s8 = LLT::fixed_vector(4, 8); @@ -534,76 +533,30 @@ for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; - - auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) { - const LLT &Ty = Query.Types[TypeIdx]; - if (Ty.isVector()) { - const LLT &EltTy = Ty.getElementType(); - if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64) - return true; - if (!isPowerOf2_32(EltTy.getSizeInBits())) - return true; - } - return false; - }; - - // FIXME: This rule is horrible, but specifies the same as what we had - // before with the particularly strange definitions removed (e.g. - // s8 = G_MERGE_VALUES s32, s32). - // Part of the complexity comes from these ops being extremely flexible. For - // example, you can build/decompose vectors with it, concatenate vectors, - // etc. and in addition to this you can also bitcast with it at the same - // time. We've been considering breaking it up into multiple ops to make it - // more manageable throughout the backend. getActionDefinitionsBuilder(Op) - // Break up vectors with weird elements into scalars - .fewerElementsIf( - [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, - scalarize(0)) - .fewerElementsIf( - [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, - scalarize(1)) - // Clamp the big scalar to s8-s128 and make it a power of 2. - .clampScalar(BigTyIdx, s8, s128) - .widenScalarIf( - [=](const LegalityQuery &Query) { - const LLT &Ty = Query.Types[BigTyIdx]; - return !isPowerOf2_32(Ty.getSizeInBits()) && - Ty.getSizeInBits() % 64 != 0 && Ty.isScalar(); - }, - [=](const LegalityQuery &Query) { - // Pick the next power of 2, or a multiple of 64 over 128. - // Whichever is smaller. - const LLT &Ty = Query.Types[BigTyIdx]; - unsigned NewSizeInBits = 1 - << Log2_32_Ceil(Ty.getSizeInBits() + 1); - if (NewSizeInBits >= 256) { - unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); - if (RoundedTo < NewSizeInBits) - NewSizeInBits = RoundedTo; - } - return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); - }) - // Clamp the little scalar to s8-s256 and make it a power of 2. It's not - // worth considering the multiples of 64 since 2*192 and 2*384 are not - // valid. - .clampScalar(LitTyIdx, s8, s256) - .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8) - // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384, - // s512, , , , or . - // At this point it's simple enough to accept the legal types. - .legalIf([=](const LegalityQuery &Query) { - const LLT &BigTy = Query.Types[BigTyIdx]; - const LLT &LitTy = Query.Types[LitTyIdx]; - if (BigTy.isVector() && BigTy.getSizeInBits() < 32) + .widenScalarToNextPow2(LitTyIdx, 8) + .widenScalarToNextPow2(BigTyIdx, 32) + .clampScalar(LitTyIdx, s8, s64) + .clampScalar(BigTyIdx, s32, s128) + .legalIf([=](const LegalityQuery &Q) { + switch (Q.Types[BigTyIdx].getSizeInBits()) { + case 32: + case 64: + case 128: + break; + default: return false; - if (LitTy.isVector() && LitTy.getSizeInBits() < 32) + } + switch (Q.Types[LitTyIdx].getSizeInBits()) { + case 8: + case 16: + case 32: + case 64: + return true; + default: return false; - return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0; - }) - // Any vectors left are the wrong size. Scalarize them. - .scalarize(0) - .scalarize(1); + } + }); } getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -51,19 +51,6 @@ br label %block } -; Currently can't handle vector lengths that aren't an exact multiple of -; natively supported vector lengths. Test that the fall-back works for those. -; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: :0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %47:_(<14 x s64>) = G_CONCAT_VECTORS %41:_(<2 x s64>), %42:_(<2 x s64>), %43:_(<2 x s64>), %44:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>) (in function: nonpow2_vector_add_fewerelements) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_vector_add_fewerelements -; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_vector_add_fewerelements: -define void @nonpow2_vector_add_fewerelements() { - %dummy = add <7 x i64> undef, undef - %ex = extractelement <7 x i64> %dummy, i64 0 - store i64 %ex, i64* undef - ret void -} - ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: RET_ReallyLR implicit $x0 (in function: strict_align_feature) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for strict_align_feature ; FALLBACK-WITH-REPORT-OUT-LABEL: strict_align_feature diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=aarch64 -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -march=aarch64 -O0 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s --- name: test_merge_s4 @@ -26,3 +26,26 @@ %4:_(s64) = G_ANYEXT %3 $x0 = COPY %4 ... +--- +name: test_merge_s16_s8 +body: | + bb.0: + ; CHECK-LABEL: name: test_merge_s16_s8 + ; CHECK: %a:_(s32) = COPY $w0 + ; CHECK: %b:_(s32) = COPY $w1 + ; CHECK: %a_t:_(s8) = G_TRUNC %a(s32) + ; CHECK: %b_t:_(s8) = G_TRUNC %b(s32) + ; CHECK: %m:_(s16) = G_MERGE_VALUES %a_t(s8), %b_t(s8) + ; CHECK: %ext:_(s64) = G_ANYEXT %m(s16) + ; CHECK: $x0 = COPY %ext(s64) + + ; This isn't legal but we don't support widening the destination type. + %a:_(s32) = COPY $w0 + %b:_(s32) = COPY $w1 + %a_t:_(s8) = G_TRUNC %a + %b_t:_(s8) = G_TRUNC %b + + %m:_(s16) = G_MERGE_VALUES %a_t, %b_t + %ext:_(s64) = G_ANYEXT %m + $x0 = COPY %ext +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -262,9 +262,9 @@ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C1]](s64) ; CHECK: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C3]](s64) + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) ; CHECK: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64) ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C5]](s64)