diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -79,9 +79,13 @@ (apply [{ applyEXT(*${root}, ${matchinfo}); }]) >; -// Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo -// instruction. -def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>; +def shuf_to_ins_matchdata : GIDefMatchData<"std::function">; +def shuf_to_ins: GICombineRule < + (defs root:$root, shuf_to_ins_matchdata:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return matchINS(*${root}, MRI, ${matchinfo}); }]), + (apply [{ return applyINS(*${root}, B, ${matchinfo}); }]) +>; def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">; def vashr_vlshr_imm : GICombineRule< @@ -100,6 +104,10 @@ (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }]) >; +def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, + form_duplane, + shuf_to_ins]>; + def adjust_icmp_imm_matchdata : GIDefMatchData<"std::pair">; def adjust_icmp_imm : GICombineRule < @@ -132,8 +140,8 @@ // pseudos. def AArch64PostLegalizerLoweringHelper : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper", - [shuffle_vector_pseudos, vashr_vlshr_imm, - icmp_lowering, form_duplane]> { + [shuffle_vector_lowering, vashr_vlshr_imm, + icmp_lowering]> { let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -176,6 +176,37 @@ return true; } +/// Helper function for matchINS. +/// +/// \returns a value when \p M is an ins mask for \p NumInputElements. +/// +/// First element of the returned pair is true when the produced +/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR. +/// +/// Second element is the destination lane for the G_INSERT_VECTOR_ELT. +static Optional> isINSMask(ArrayRef M, + int NumInputElements) { + if (M.size() != static_cast(NumInputElements)) + return None; + int NumLHSMatch = 0, NumRHSMatch = 0; + int LastLHSMismatch = -1, LastRHSMismatch = -1; + for (int Idx = 0; Idx < NumInputElements; ++Idx) { + if (M[Idx] == -1) { + ++NumLHSMatch; + ++NumRHSMatch; + continue; + } + M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx; + M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx; + } + const int NumNeededToMatch = NumInputElements - 1; + if (NumLHSMatch == NumNeededToMatch) + return std::make_pair(true, LastLHSMismatch); + if (NumRHSMatch == NumNeededToMatch) + return std::make_pair(false, LastRHSMismatch); + return None; +} + /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a /// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc. static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, @@ -378,6 +409,59 @@ return true; } +/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a +/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair. +/// +/// e.g. +/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0) +/// +/// Can be represented as +/// +/// %extract = G_EXTRACT_VECTOR_ELT %left, 0 +/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1 +/// +static bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI, + std::function &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask(); + Register Dst = MI.getOperand(0).getReg(); + int NumElts = MRI.getType(Dst).getNumElements(); + auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts); + if (!DstIsLeftAndDstLane) + return false; + bool DstIsLeft; + int DstLane; + std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane; + Register Left = MI.getOperand(1).getReg(); + Register Right = MI.getOperand(2).getReg(); + Register DstVec = DstIsLeft ? Left : Right; + Register SrcVec = Left; + + int SrcLane = ShuffleMask[DstLane]; + if (SrcLane >= NumElts) { + SrcVec = Right; + SrcLane -= NumElts; + } + + auto ScalarTy = MRI.getType(Dst).getElementType(); + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildInsertVectorElement( + Dst, DstVec, + MIB.buildExtractVectorElement( + ScalarTy, SrcVec, MIB.buildConstant(LLT::scalar(64), SrcLane)), + MIB.buildConstant(LLT::scalar(64), DstLane)); + }; + return true; +} + +static bool applyINS(MachineInstr &MI, MachineIRBuilder &Builder, + std::function &MatchInfo) { + Builder.setInstrAndDebugLoc(MI); + MatchInfo(Builder); + MI.eraseFromParent(); + return true; +} + /// isVShiftRImm - Check if this is a valid vector for the immediate /// operand of a vector shift right operation. The value must be in the range: /// 1 <= Value <= ElementBits for a right shift. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuf-to-ins.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuf-to-ins.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuf-to-ins.mir @@ -0,0 +1,310 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=aarch64-postlegalizer-lowering --aarch64postlegalizerloweringhelper-only-enable-rule="shuf_to_ins" -verify-machineinstrs %s -o - | FileCheck %s + +# REQUIRES: asserts + +# Check that we can recognize an ins mask for a shuffle vector. + +... +--- +name: v2s32_match_left_0 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; 2 elts -> need 1 match. + ; + ; Matched M[0] = 0 -> G_INSERT_VECTOR_ELT should use %left. + ; DstLane (G_INSERT_VECTOR_ELT) : 1, because M[1] != 1. + ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[DstLane] = 0 + + ; CHECK-LABEL: name: v2s32_match_left_0 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %left:_(<2 x s32>) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %left(<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64) + ; CHECK: $d0 = COPY %shuf(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %left:_(<2 x s32>) = COPY $d0 + %right:_(<2 x s32>) = COPY $d1 + %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(0, 0) + $d0 = COPY %shuf(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: v2s32_match_left_1 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; 2 elts -> need 1 match. + ; + ; Matched M[1] = 1 -> G_INSERT_VECTOR_ELT should use %left. + ; DstLane (G_INSERT_VECTOR_ELT) : 0, because M[0] != 0. + ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[0] = 1 + + ; CHECK-LABEL: name: v2s32_match_left_1 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %left:_(<2 x s32>) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %left(<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64) + ; CHECK: $d0 = COPY %shuf(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %left:_(<2 x s32>) = COPY $d0 + %right:_(<2 x s32>) = COPY $d1 + %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(1, 1) + $d0 = COPY %shuf(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: v2s32_match_left_3 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; 2 elts -> need 1 match. + ; + ; Matched M[0] = 1 -> G_INSERT_VECTOR_ELT should use %left. + ; DstLane (G_INSERT_VECTOR_ELT) : 1, because M[1] != 1. + ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[1] = 3 - 2 = 1 + + ; CHECK-LABEL: name: v2s32_match_left_3 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %left:_(<2 x s32>) = COPY $d0 + ; CHECK: %right:_(<2 x s32>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %right(<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64) + ; CHECK: $d0 = COPY %shuf(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %left:_(<2 x s32>) = COPY $d0 + %right:_(<2 x s32>) = COPY $d1 + %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(0, 3) + $d0 = COPY %shuf(<2 x s32>) + RET_ReallyLR implicit $d0 + + +... +--- +name: v2s32_match_right_3 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; 2 elts -> need 1 match. + ; + ; Matched M[1] = 1 + 2 -> G_INSERT_VECTOR_ELT should use %right. + ; DstLane (G_INSERT_VECTOR_ELT) : 0, because M[0] != 2. + ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[0] = 1 + + ; CHECK-LABEL: name: v2s32_match_right_3 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %left:_(<2 x s32>) = COPY $d0 + ; CHECK: %right:_(<2 x s32>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %left(<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %right, [[EVEC]](s32), [[C1]](s64) + ; CHECK: $d0 = COPY %shuf(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %left:_(<2 x s32>) = COPY $d0 + %right:_(<2 x s32>) = COPY $d1 + %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(1, 3) + $d0 = COPY %shuf(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: v2s32_match_right_2 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; 2 elts -> need 1 match. + ; + ; Matched M[0] = 0 + 2 -> G_INSERT_VECTOR_ELT should use %right. + ; DstLane (G_INSERT_VECTOR_ELT) : 1, because M[1] != 3. + ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[1] = 0 + + ; CHECK-LABEL: name: v2s32_match_right_2 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %left:_(<2 x s32>) = COPY $d0 + ; CHECK: %right:_(<2 x s32>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %left(<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %right, [[EVEC]](s32), [[C1]](s64) + ; CHECK: $d0 = COPY %shuf(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %left:_(<2 x s32>) = COPY $d0 + %right:_(<2 x s32>) = COPY $d1 + %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(2, 0) + $d0 = COPY %shuf(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: dont_combine_too_many_matches_right +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; 2 elts -> need 1 match. + ; + ; Matched M[0] = 0 + 2, M[1] = 1 + 2 -> too many matches. + + ; CHECK-LABEL: name: dont_combine_too_many_matches_right + ; CHECK: liveins: $d0, $d1 + ; CHECK: %left:_(<2 x s32>) = COPY $d0 + ; CHECK: %right:_(<2 x s32>) = COPY $d1 + ; CHECK: %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(2, 3) + ; CHECK: $d0 = COPY %shuf(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %left:_(<2 x s32>) = COPY $d0 + %right:_(<2 x s32>) = COPY $d1 + %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(2, 3) + $d0 = COPY %shuf(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: tiebreaker +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; Matched the correct amount on the left and right. + ; Use left as a tiebreaker. + ; + ; Matched M[1] = 1 -> G_INSERT_VECTOR_ELT should use %left. + ; DstLane (G_INSERT_VECTOR_ELT) : 0, because M[0] != 0. + ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[0] = 2 - 2 = 0 + + ; CHECK-LABEL: name: tiebreaker + ; CHECK: liveins: $d0, $d1 + ; CHECK: %left:_(<2 x s32>) = COPY $d0 + ; CHECK: %right:_(<2 x s32>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %right(<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64) + ; CHECK: $d0 = COPY %shuf(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %left:_(<2 x s32>) = COPY $d0 + %right:_(<2 x s32>) = COPY $d1 + %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(2, 1) + $d0 = COPY %shuf(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: tiebreaker_undef +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; Undef counts as a match for left and right. + ; + ; Matched M[1] = -1 -> G_INSERT_VECTOR_ELT should use %left. + ; DstLane (G_INSERT_VECTOR_ELT) : 0, because M[0] != 0. + ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[0] = 2 - 2 = 0 + + ; CHECK-LABEL: name: tiebreaker_undef + ; CHECK: liveins: $d0, $d1 + ; CHECK: %left:_(<2 x s32>) = COPY $d0 + ; CHECK: %right:_(<2 x s32>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %right(<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64) + ; CHECK: $d0 = COPY %shuf(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %left:_(<2 x s32>) = COPY $d0 + %right:_(<2 x s32>) = COPY $d1 + %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(2, -1) + $d0 = COPY %shuf(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: match_left_undef +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; Undef counts as a match for left and right. + ; + ; Matched M[1] = -1 -> G_INSERT_VECTOR_ELT should use %left. + ; DstLane (G_INSERT_VECTOR_ELT) : 0, because M[0] != 0. + ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[0] = 3 - 2 = 1 + + ; CHECK-LABEL: name: match_left_undef + ; CHECK: liveins: $d0, $d1 + ; CHECK: %left:_(<2 x s32>) = COPY $d0 + ; CHECK: %right:_(<2 x s32>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %right(<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64) + ; CHECK: $d0 = COPY %shuf(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %left:_(<2 x s32>) = COPY $d0 + %right:_(<2 x s32>) = COPY $d1 + %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(3, -1) + $d0 = COPY %shuf(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: match_right_undef +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; Matched M[0] = 0 + 4, undef, undef => 3 matches on the right. + ; + ; DstLane (G_INSERT_VECTOR_ELT) : 3, because M[3] != 7. + ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[3] = 2 + + ; CHECK-LABEL: name: match_right_undef + ; CHECK: liveins: $q0, $q1 + ; CHECK: %left:_(<4 x s32>) = COPY $q0 + ; CHECK: %right:_(<4 x s32>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %left(<4 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK: %shuf:_(<4 x s32>) = G_INSERT_VECTOR_ELT %right, [[EVEC]](s32), [[C1]](s64) + ; CHECK: $q0 = COPY %shuf(<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %left:_(<4 x s32>) = COPY $q0 + %right:_(<4 x s32>) = COPY $q1 + %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %left(<4 x s32>), %right, shufflemask(4, -1, -1, 2) + $q0 = COPY %shuf(<4 x s32>) + RET_ReallyLR implicit $q0