Index: llvm/lib/Target/AArch64/AArch64Combine.td =================================================================== --- llvm/lib/Target/AArch64/AArch64Combine.td +++ llvm/lib/Target/AArch64/AArch64Combine.td @@ -29,11 +29,24 @@ (defs root:$root, zip_matchdata:$matchinfo), (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, [{ return matchZip(*${root}, MRI, ${matchinfo}); }]), - (apply [{ applyZip(*${root}, ${matchinfo}); }]) + (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) >; +def uzp_matchdata : GIDefMatchData<"unsigned">; +def uzp : GICombineRule< + (defs root:$root, uzp_matchdata:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return matchUZP(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) +>; + +// Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo +// instruction. +def shuffle_vector_pseudos : GICombineGroup<[zip, uzp]>; + def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", - [erase_undef_store, combines_for_extload, zip]> { + [erase_undef_store, combines_for_extload, + shuffle_vector_pseudos]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } Index: llvm/lib/Target/AArch64/AArch64InstrGISel.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -25,6 +25,20 @@ let hasSideEffects = 0; } +// Represents an uzp1 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_UZP1 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2); +} + +// Represents an uzp2 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_UZP2 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2); +} + // Represents a zip1 instruction. Produced post-legalization from // G_SHUFFLE_VECTORs with appropriate masks. def G_ZIP1 : AArch64GenericInstruction { @@ -39,5 +53,7 @@ let InOperandList = (ins type0:$v1, type0:$v2); } +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; Index: llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp +++ llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp @@ -28,6 +28,21 @@ using namespace llvm; +/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. +/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. +static bool isUZPMask(ArrayRef M, unsigned NumElts, + unsigned &WhichResult) { + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i != NumElts; ++i) { + // Skip undef indices. + if (M[i] < 0) + continue; + if (static_cast(M[i]) != 2 * i + WhichResult) + return false; + } + return true; +} + /// \return true if \p M is a zip mask for a shuffle vector of \p NumElts. /// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult. static bool isZipMask(ArrayRef M, unsigned NumElts, @@ -47,6 +62,23 @@ return true; } +/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with +/// a G_UZP1 or G_UZP2 instruction. +/// +/// \param [in] MI - The shuffle vector instruction. +/// \param [out] Opc - Either G_UZP1 or G_UZP2 on success. +static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI, + unsigned &Opc) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + unsigned WhichResult; + ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask(); + unsigned NumElts = MRI.getType(MI.getOperand(0).getReg()).getNumElements(); + if (!isUZPMask(ShuffleMask, NumElts, WhichResult)) + return false; + Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2; + return true; +} + static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned &Opc) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); @@ -59,7 +91,9 @@ return true; } -static bool applyZip(MachineInstr &MI, unsigned Opc) { +/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. +/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. +static bool applyShuffleVectorPseudo(MachineInstr &MI, unsigned Opc) { MachineIRBuilder MIRBuilder(MI); MIRBuilder.buildInstr(Opc, {MI.getOperand(0).getReg()}, {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); Index: llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-uzp.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-uzp.mir @@ -0,0 +1,146 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# Check that we can recognize a shuffle mask for a uzp instruction and produce +# a G_UZP1 or G_UZP2 where appropriate. +# +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: uzp1_v4s32 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: uzp1_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 2, 4, 6) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: uzp2_v4s32 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: uzp2_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]] + ; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, 5, 7) + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: no_uzp1 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; See isUZPMask: Mask[1] != 2 * i + 0 + + ; CHECK-LABEL: name: no_uzp1 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, 6) + ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 1, 4, 6) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: no_uzp2 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; See isUZPMask: Mask[1] != 2 * i + 1 + + ; CHECK-LABEL: name: no_uzp2 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(1, 4, 5, 7) + ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 4, 5, 7) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: uzp1_undef +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; Make sure that we can still produce a uzp1/uzp2 with undef indices. + + ; CHECK-LABEL: name: uzp1_undef + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, -1, 4, 6) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: uzp2_undef +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; Make sure that we can still produce a uzp1/uzp2 with undef indices. + + ; CHECK-LABEL: name: uzp2_undef + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]] + ; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, -1, 7) + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR implicit $q0 Index: llvm/test/CodeGen/AArch64/GlobalISel/select-uzp.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/select-uzp.mir @@ -0,0 +1,53 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# Check that we can select G_UZP1 and G_UZP2 via the tablegen importer. +# +# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: uzp1_v4s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: uzp1_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[UZP1v4i32_:%[0-9]+]]:fpr128 = UZP1v4i32 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[UZP1v4i32_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = G_UZP1 %0, %1 + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: uzp2_v4s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: uzp2_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[UZP2v4i32_:%[0-9]+]]:fpr128 = UZP2v4i32 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[UZP2v4i32_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = G_UZP2 %0, %1 + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +...