diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -293,6 +293,8 @@ LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy); + LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, + LLT MoreTy); LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4871,11 +4871,56 @@ } case TargetOpcode::G_PHI: return moreElementsVectorPhi(MI, TypeIdx, MoreTy); + case TargetOpcode::G_SHUFFLE_VECTOR: + return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); default: return UnableToLegalize; } } +LegalizerHelper::LegalizeResult +LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI, + unsigned int TypeIdx, LLT MoreTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + Register Src1Reg = MI.getOperand(1).getReg(); + Register Src2Reg = MI.getOperand(2).getReg(); + ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + LLT DstTy = MRI.getType(DstReg); + LLT Src1Ty = MRI.getType(Src1Reg); + LLT Src2Ty = MRI.getType(Src2Reg); + unsigned NumElts = DstTy.getNumElements(); + unsigned WidenNumElts = MoreTy.getNumElements(); + + // Expect a canonicalized shuffle. + if (DstTy != Src1Ty || DstTy != Src2Ty) + return UnableToLegalize; + + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorSrc(MI, MoreTy, 2); + + // Adjust mask based on new input vector length. + SmallVector NewMask; + for (unsigned I = 0; I != NumElts; ++I) { + int Idx = Mask[I]; + if (Idx < static_cast(NumElts)) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); + } + for (unsigned I = NumElts; I != WidenNumElts; ++I) + NewMask.push_back(-1); + moreElementsVectorDst(MI, MoreTy, 0); + MIRBuilder.setInstrAndDebugLoc(MI); + MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(), + MI.getOperand(1).getReg(), + MI.getOperand(2).getReg(), NewMask); + MI.eraseFromParent(); + return Legalized; +} + void LegalizerHelper::multiplyRegisters(SmallVectorImpl &DstRegs, ArrayRef Src1Regs, ArrayRef Src2Regs, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -698,6 +698,7 @@ .lowerIf([=](const LegalityQuery &Query) { return !Query.Types[1].isVector(); }) + .moreElementsToNextPow2(0) .clampNumElements(0, v4s32, v4s32) .clampNumElements(0, v2s64, v2s64); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -212,3 +212,88 @@ RET_ReallyLR ... +--- +name: oversize_shuffle_v6i64 +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 8 +fixedStack: + - { id: 0, offset: 24, size: 8, alignment: 8, isImmutable: true } + - { id: 1, offset: 16, size: 8, alignment: 16, isImmutable: true } + - { id: 2, offset: 8, size: 8, alignment: 8, isImmutable: true } + - { id: 3, size: 8, alignment: 16, isImmutable: true } +body: | + bb.1: + liveins: $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + + ; CHECK-LABEL: name: oversize_shuffle_v6i64 + ; CHECK: liveins: $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $d2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $d3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $d4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $d5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) + ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $d6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $d7 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.1) + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16) + ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s64) from %fixed-stack.3) + ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64) + ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD2]](s64), [[LOAD3]](s64) + ; CHECK: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s64>), [[C1]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C3]](s64) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C1]](s64) + ; CHECK: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C3]](s64) + ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) + ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) + ; CHECK: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C5]](s64) + ; CHECK: G_STORE [[BUILD_VECTOR7]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C6]](s64) + ; CHECK: G_STORE [[SHUF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32) + ; CHECK: RET_ReallyLR + %3:_(s64) = COPY $d0 + %4:_(s64) = COPY $d1 + %5:_(s64) = COPY $d2 + %6:_(s64) = COPY $d3 + %7:_(s64) = COPY $d4 + %8:_(s64) = COPY $d5 + %0:_(<6 x s64>) = G_BUILD_VECTOR %3(s64), %4(s64), %5(s64), %6(s64), %7(s64), %8(s64) + %9:_(s64) = COPY $d6 + %10:_(s64) = COPY $d7 + %15:_(p0) = G_FRAME_INDEX %fixed-stack.3 + %11:_(s64) = G_LOAD %15(p0) :: (invariant load 8 from %fixed-stack.3, align 16) + %16:_(p0) = G_FRAME_INDEX %fixed-stack.2 + %12:_(s64) = G_LOAD %16(p0) :: (invariant load 8 from %fixed-stack.2) + %17:_(p0) = G_FRAME_INDEX %fixed-stack.1 + %13:_(s64) = G_LOAD %17(p0) :: (invariant load 8 from %fixed-stack.1, align 16) + %18:_(p0) = G_FRAME_INDEX %fixed-stack.0 + %14:_(s64) = G_LOAD %18(p0) :: (invariant load 8 from %fixed-stack.0) + %1:_(<6 x s64>) = G_BUILD_VECTOR %9(s64), %10(s64), %11(s64), %12(s64), %13(s64), %14(s64) + %2:_(p0) = COPY $x0 + %19:_(<6 x s64>) = G_SHUFFLE_VECTOR %0(<6 x s64>), %1, shufflemask(3, 4, 7, 0, 1, 11) + G_STORE %19(<6 x s64>), %2(p0) :: (store 48, align 64) + RET_ReallyLR + +... diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -4007,4 +4007,51 @@ EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; } +// Test moreElements of G_SHUFFLE_VECTOR. +TEST_F(AArch64GISelMITest, moreElementsShuffle) { + setUp(); + if (!TM) + return; + + DefineLegalizerInfo(A, {}); + + LLT S64{LLT::scalar(64)}; + LLT V6S64 = LLT::fixed_vector(6, S64); + + auto V1 = B.buildBuildVector(V6S64, {Copies[0], Copies[1], Copies[0], + Copies[1], Copies[0], Copies[1]}); + auto V2 = B.buildBuildVector(V6S64, {Copies[0], Copies[1], Copies[0], + Copies[1], Copies[0], Copies[1]}); + auto Shuffle = B.buildShuffleVector(V6S64, V1, V2, {3, 4, 7, 0, 1, 11}); + + AInfo Info(MF->getSubtarget()); + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, Info, Observer, B); + + // Perform Legalization + B.setInsertPt(*EntryMBB, Shuffle->getIterator()); + + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.moreElementsVector(*Shuffle, 0, LLT::fixed_vector(8, S64))); + + const auto *CheckStr = R"( + CHECK: [[COPY0:%[0-9]+]]:_(s64) = COPY + CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY + CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY + CHECK: [[BV1:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR + CHECK: [[BV2:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR + CHECK: [[IMPDEF1:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF + CHECK: [[INSERT1:%[0-9]+]]:_(<8 x s64>) = G_INSERT [[IMPDEF1]]:_, [[BV1]]:_(<6 x s64>), 0 + CHECK: [[IMPDEF2:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF + CHECK: [[INSERT2:%[0-9]+]]:_(<8 x s64>) = G_INSERT [[IMPDEF2]]:_, [[BV2]]:_(<6 x s64>), 0 + CHECK: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[INSERT1]]:_(<8 x s64>), [[INSERT2]]:_, shufflemask(3, 4, 9, 0, 1, 13, undef, undef) + CHECK: [[IMPDEF3:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF + CHECK: [[CONCAT:%[0-9]+]]:_(<24 x s64>) = G_CONCAT_VECTORS [[SHUF]]:_(<8 x s64>), [[IMPDEF3]]:_(<8 x s64>), [[IMPDEF3]]:_(<8 x s64>) + CHECK: [[UNMERGE:%[0-9]+]]:_(<6 x s64>), [[UNMERGE2:%[0-9]+]]:_(<6 x s64>), [[UNMERGE3:%[0-9]+]]:_(<6 x s64>), [[UNMERGE4:%[0-9]+]]:_(<6 x s64>) = G_UNMERGE_VALUES [[CONCAT]]:_(<24 x s64>) + )"; + + // Check + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} + } // namespace