diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -24,8 +24,16 @@ let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule"; } +def zip_matchdata : GIDefMatchData<"unsigned">; +def zip : GICombineRule< + (defs root:$root, zip_matchdata:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return matchZip(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyZip(*${root}, ${matchinfo}); }]) +>; + def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", - [erase_undef_store, combines_for_extload]> { + [erase_undef_store, combines_for_extload, zip]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -24,3 +24,20 @@ let InOperandList = (ins type1:$src, type2:$imm); let hasSideEffects = 0; } + +// Represents a zip1 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_ZIP1 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2); +} + +// Represents a zip2 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_ZIP2 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2); +} + +def : GINodeEquiv; +def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp @@ -28,6 +28,45 @@ using namespace llvm; +/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts. +/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult. +static bool isZipMask(ArrayRef M, unsigned NumElts, + unsigned &WhichResult) { + if (NumElts % 2 != 0) + return false; + + // 0 means use ZIP1, 1 means use ZIP2. + WhichResult = (M[0] == 0 ? 0 : 1); + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned i = 0; i != NumElts; i += 2) { + if ((M[i] >= 0 && static_cast(M[i]) != Idx) || + (M[i + 1] >= 0 && static_cast(M[i + 1]) != Idx + NumElts)) + return false; + Idx += 1; + } + return true; +} + +static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, + unsigned &Opc) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + unsigned WhichResult; + ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask(); + unsigned NumElts = MRI.getType(MI.getOperand(0).getReg()).getNumElements(); + if (!isZipMask(ShuffleMask, NumElts, WhichResult)) + return false; + Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2; + return true; +} + +static bool applyZip(MachineInstr &MI, unsigned Opc) { + MachineIRBuilder MIRBuilder(MI); + MIRBuilder.buildInstr(Opc, {MI.getOperand(0).getReg()}, + {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); + MI.eraseFromParent(); + return true; +} + #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-zip.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-zip.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-zip.mir @@ -0,0 +1,221 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# Check that we can recognize a shuffle mask for a zip instruction, and produce +# G_ZIP1 or G_ZIP2 where appropriate. +# +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: zip1_v2s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: zip1_v2s32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK: [[ZIP1_:%[0-9]+]]:_(<2 x s32>) = G_ZIP1 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ZIP1_]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(0, 2) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: zip1_v2s64 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: zip1_v2s64 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK: [[ZIP1_:%[0-9]+]]:_(<2 x s64>) = G_ZIP1 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[ZIP1_]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %2:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, shufflemask(0, 2) + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: zip1_v4s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: zip1_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[ZIP1_:%[0-9]+]]:_(<4 x s32>) = G_ZIP1 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[ZIP1_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 4, 1, 5) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: zip2_v2s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: zip2_v2s32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK: [[ZIP2_:%[0-9]+]]:_(<2 x s32>) = G_ZIP2 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ZIP2_]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, 3) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: zip2_v2s64 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: zip2_v2s64 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK: [[ZIP2_:%[0-9]+]]:_(<2 x s64>) = G_ZIP2 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[ZIP2_]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %2:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, shufflemask(1, 3) + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: zip2_v4s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: zip2_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[ZIP2_:%[0-9]+]]:_(<4 x s32>) = G_ZIP2 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[ZIP2_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(2, 6, 3, 7) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: zip2_no_combine_idx_mismatch +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; This will fail because it expects 3 to be the second element of the + ; shuffle vector mask. + ; + ; CHECK-LABEL: name: zip2_no_combine_idx_mismatch + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[COPY1]], shufflemask(1, 2) + ; CHECK: $q0 = COPY [[SHUF]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %2:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, shufflemask(1, 2) + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: zip1_no_combine_idx_mismatch +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; This will fail because it expects 2 to be the second element of the + ; shuffle vector mask. + ; + ; CHECK-LABEL: name: zip1_no_combine_idx_mismatch + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[COPY1]], shufflemask(0, 1) + ; CHECK: $q0 = COPY [[SHUF]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %2:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, shufflemask(0, 1) + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: no_combine_first_elt_of_mask_must_be_zero_or_one +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; zip1/zip2 must have 0 or 1 as the first element in the shuffle mask. + ; + ; CHECK-LABEL: name: no_combine_first_elt_of_mask_must_be_zero_or_one + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 4, 1, 5) + ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(3, 4, 1, 5) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-zip.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-zip.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-zip.mir @@ -0,0 +1,142 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# Check that we can select G_ZIP1 and G_ZIP2 via the tablegen importer. +# +# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: zip1_v2s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: zip1_v2s32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[ZIP1v2i32_:%[0-9]+]]:fpr64 = ZIP1v2i32 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ZIP1v2i32_]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = COPY $d1 + %2:fpr(<2 x s32>) = G_ZIP1 %0, %1 + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 +... +--- +name: zip1_v2s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: zip1_v2s64 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[ZIP1v2i64_:%[0-9]+]]:fpr128 = ZIP1v2i64 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[ZIP1v2i64_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = COPY $q1 + %2:fpr(<2 x s64>) = G_ZIP1 %0, %1 + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 +... +--- +name: zip1_v4s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: zip1_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[ZIP1v4i32_:%[0-9]+]]:fpr128 = ZIP1v4i32 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[ZIP1v4i32_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = G_ZIP1 %0, %1 + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: zip2_v2s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: zip2_v2s32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[ZIP2v2i32_:%[0-9]+]]:fpr64 = ZIP2v2i32 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ZIP2v2i32_]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = COPY $d1 + %2:fpr(<2 x s32>) = G_ZIP2 %0, %1 + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 +... +--- +name: zip2_v2s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: zip2_v2s64 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[ZIP2v2i64_:%[0-9]+]]:fpr128 = ZIP2v2i64 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[ZIP2v2i64_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = COPY $q1 + %2:fpr(<2 x s64>) = G_ZIP2 %0, %1 + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 +... +--- +name: zip2_v4s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; CHECK-LABEL: name: zip2_v4s32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[ZIP2v4i32_:%[0-9]+]]:fpr128 = ZIP2v4i32 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[ZIP2v4i32_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = G_ZIP2 %0, %1 + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0