diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -24,17 +24,26 @@ let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule"; } -def zip_matchdata : GIDefMatchData<"unsigned">; +// Matchdata for combines which replace a G_SHUFFLE_VECTOR with a +// target-specific opcode. +def shuffle_matchdata : GIDefMatchData<"ShuffleVectorPseudo">; + +def rev : GICombineRule< + (defs root:$root, shuffle_matchdata:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return matchREV(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) +>; + def zip : GICombineRule< - (defs root:$root, zip_matchdata:$matchinfo), + (defs root:$root, shuffle_matchdata:$matchinfo), (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, [{ return matchZip(*${root}, MRI, ${matchinfo}); }]), (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) >; -def uzp_matchdata : GIDefMatchData<"unsigned">; def uzp : GICombineRule< - (defs root:$root, uzp_matchdata:$matchinfo), + (defs root:$root, shuffle_matchdata:$matchinfo), (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, [{ return matchUZP(*${root}, MRI, ${matchinfo}); }]), (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) @@ -42,7 +51,7 @@ // Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo // instruction. -def shuffle_vector_pseudos : GICombineGroup<[zip, uzp]>; +def shuffle_vector_pseudos : GICombineGroup<[rev, zip, uzp]>; def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -25,6 +25,27 @@ let hasSideEffects = 0; } +// Pseudo for a rev16 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_REV16 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); +} + +// Pseudo for a rev32 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_REV32 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); +} + +// Pseudo for a rev64 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_REV64 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); +} + // Represents an uzp1 instruction. Produced post-legalization from // G_SHUFFLE_VECTORs with appropriate masks. def G_UZP1 : AArch64GenericInstruction { @@ -53,6 +74,9 @@ let InOperandList = (ins type0:$v1, type0:$v2); } +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp @@ -28,6 +28,48 @@ using namespace llvm; +/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR. +/// +/// Used for matching target-supported shuffles before codegen. +struct ShuffleVectorPseudo { + unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1) + Register Dst; ///< Destination register. + SmallVector SrcOps; ///< Source registers. + ShuffleVectorPseudo(unsigned Opc, Register Dst, + std::initializer_list SrcOps) + : Opc(Opc), Dst(Dst), SrcOps(SrcOps){}; + ShuffleVectorPseudo() {} +}; + +/// Check if a vector shuffle corresponds to a REV instruction with the +/// specified blocksize. +static bool isREVMask(ArrayRef M, unsigned EltSize, unsigned NumElts, + unsigned BlockSize) { + assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && + "Only possible block sizes for REV are: 16, 32, 64"); + assert(EltSize != 64 && "EltSize cannot be 64 for REV mask."); + + unsigned BlockElts = M[0] + 1; + + // If the first shuffle index is UNDEF, be optimistic. + if (M[0] < 0) + BlockElts = BlockSize / EltSize; + + if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize) + return false; + + for (unsigned i = 0; i < NumElts; ++i) { + // Ignore undef indices. + if (M[i] < 0) + continue; + if (static_cast(M[i]) != + (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) + return false; + } + + return true; +} + /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. static bool isUZPMask(ArrayRef M, unsigned NumElts, @@ -62,41 +104,78 @@ return true; } +/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a +/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc. +static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask(); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(Dst); + unsigned EltSize = Ty.getScalarSizeInBits(); + + // Element size for a rev cannot be 64. + if (EltSize == 64) + return false; + + unsigned NumElts = Ty.getNumElements(); + + // Try to produce G_REV64 + if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) { + MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src}); + return true; + } + + // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support. + // This should be identical to above, but with a constant 32 and constant + // 16. + return false; +} + /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with /// a G_UZP1 or G_UZP2 instruction. /// /// \param [in] MI - The shuffle vector instruction. /// \param [out] Opc - Either G_UZP1 or G_UZP2 on success. static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI, - unsigned &Opc) { + ShuffleVectorPseudo &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); unsigned WhichResult; ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask(); - unsigned NumElts = MRI.getType(MI.getOperand(0).getReg()).getNumElements(); + Register Dst = MI.getOperand(0).getReg(); + unsigned NumElts = MRI.getType(Dst).getNumElements(); if (!isUZPMask(ShuffleMask, NumElts, WhichResult)) return false; - Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2; + unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); return true; } static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, - unsigned &Opc) { + ShuffleVectorPseudo &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); unsigned WhichResult; ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask(); - unsigned NumElts = MRI.getType(MI.getOperand(0).getReg()).getNumElements(); + Register Dst = MI.getOperand(0).getReg(); + unsigned NumElts = MRI.getType(Dst).getNumElements(); if (!isZipMask(ShuffleMask, NumElts, WhichResult)) return false; - Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2; + unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); return true; } /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. -static bool applyShuffleVectorPseudo(MachineInstr &MI, unsigned Opc) { +static bool applyShuffleVectorPseudo(MachineInstr &MI, + ShuffleVectorPseudo &MatchInfo) { MachineIRBuilder MIRBuilder(MI); - MIRBuilder.buildInstr(Opc, {MI.getOperand(0).getReg()}, - {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); + MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps); MI.eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-rev.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-rev.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-rev.mir @@ -0,0 +1,85 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# +# Test producing a G_REV from an appropriate G_SHUFFLE_VECTOR. + +... +--- +name: rev64_mask_1_0 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; CHECK-LABEL: name: rev64_mask_1_0 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[REV64_:%[0-9]+]]:_(<2 x s32>) = G_REV64 [[COPY]] + ; CHECK: $d0 = COPY [[REV64_]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, 0) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: rev64_mask_1_undef +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; CHECK-LABEL: name: rev64_mask_1_undef + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[REV64_:%[0-9]+]]:_(<2 x s32>) = G_REV64 [[COPY]] + ; CHECK: $d0 = COPY [[REV64_]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, undef) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: no_rev64_mask_1 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + + ; Verify that we don't produce a G_REV64 when + ; + ; M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts) + ; In this example, BlockElts = 2 + ; + ; At i = 1 + ; M[i] = 3 + ; i % BlockElts = i % 2 = 1 + ; + ; So + ; + ; 3 != (1 - 1) + (2 - 1 - 1) + ; 3 != 0 + ; + ; And so we should not produce a G_REV64. + ; + ; CHECK-LABEL: name: no_rev64_mask_1 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK: [[ZIP2_:%[0-9]+]]:_(<2 x s32>) = G_ZIP2 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ZIP2_]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, 3) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-rev.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-rev.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-rev.mir @@ -0,0 +1,239 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Test selecting G_REV instructions. +# +# Each test is named like: +# +# (G_REV_VERSION)_(INSTRUCTION_PRODUCED) +# +# Each of these patterns come from AArch64GenGlobalISel.inc. +# + +... +--- +name: rev64_REV64v2i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $d0 + ; CHECK-LABEL: name: rev64_REV64v2i32 + ; CHECK: liveins: $d0 + ; CHECK: %copy:fpr64 = COPY $d0 + ; CHECK: %rev:fpr64 = REV64v2i32 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<2 x s32>) = COPY $d0 + %rev:fpr(<2 x s32>) = G_REV64 %copy + RET_ReallyLR + +... +--- +name: rev64_REV64v4i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $d0 + ; CHECK-LABEL: name: rev64_REV64v4i16 + ; CHECK: liveins: $d0 + ; CHECK: %copy:fpr64 = COPY $d0 + ; CHECK: %rev:fpr64 = REV64v4i16 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<4 x s16>) = COPY $d0 + %rev:fpr(<4 x s16>) = G_REV64 %copy + RET_ReallyLR + +... +--- +name: rev64_REV64v4i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: rev64_REV64v4i32 + ; CHECK: liveins: $q0 + ; CHECK: %copy:fpr128 = COPY $q0 + ; CHECK: %rev:fpr128 = REV64v4i32 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<4 x s32>) = COPY $q0 + %rev:fpr(<4 x s32>) = G_REV64 %copy + RET_ReallyLR + +... +--- +name: rev64_REV64v8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: rev64_REV64v8i8 + ; CHECK: liveins: $q0 + ; CHECK: %copy:fpr64 = COPY $d0 + ; CHECK: %rev:fpr64 = REV64v8i8 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<8 x s8>) = COPY $d0 + %rev:fpr(<8 x s8>) = G_REV64 %copy + RET_ReallyLR + +... +--- +name: rev64_REV64v8i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: rev64_REV64v8i16 + ; CHECK: liveins: $q0 + ; CHECK: %copy:fpr128 = COPY $q0 + ; CHECK: %rev:fpr128 = REV64v8i16 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<8 x s16>) = COPY $q0 + %rev:fpr(<8 x s16>) = G_REV64 %copy + RET_ReallyLR + +... +--- +name: rev64_REV64v16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: rev64_REV64v16i8 + ; CHECK: liveins: $q0 + ; CHECK: %copy:fpr128 = COPY $q0 + ; CHECK: %rev:fpr128 = REV64v16i8 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<16 x s8>) = COPY $q0 + %rev:fpr(<16 x s8>) = G_REV64 %copy + RET_ReallyLR + +... +--- +name: rev32_REV32v4i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $d0 + ; CHECK-LABEL: name: rev32_REV32v4i16 + ; CHECK: liveins: $d0 + ; CHECK: %copy:fpr64 = COPY $d0 + ; CHECK: %rev:fpr64 = REV32v4i16 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<4 x s16>) = COPY $d0 + %rev:fpr(<4 x s16>) = G_REV32 %copy + RET_ReallyLR + +... +--- +name: rev32_REV32v8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $d0 + ; CHECK-LABEL: name: rev32_REV32v8i8 + ; CHECK: liveins: $d0 + ; CHECK: %copy:fpr64 = COPY $d0 + ; CHECK: %rev:fpr64 = REV32v8i8 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<8 x s8>) = COPY $d0 + %rev:fpr(<8 x s8>) = G_REV32 %copy + RET_ReallyLR + +... +--- +name: rev32_REV32v8i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: rev32_REV32v8i16 + ; CHECK: liveins: $q0 + ; CHECK: %copy:fpr128 = COPY $q0 + ; CHECK: %rev:fpr128 = REV32v8i16 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<8 x s16>) = COPY $q0 + %rev:fpr(<8 x s16>) = G_REV32 %copy + RET_ReallyLR + +... +--- +name: rev32_REV32v16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: rev32_REV32v16i8 + ; CHECK: liveins: $q0 + ; CHECK: %copy:fpr128 = COPY $q0 + ; CHECK: %rev:fpr128 = REV32v16i8 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<16 x s8>) = COPY $q0 + %rev:fpr(<16 x s8>) = G_REV32 %copy + RET_ReallyLR + +... +--- +name: rev16_REV16v8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: rev16_REV16v8i8 + ; CHECK: liveins: $q0 + ; CHECK: %copy:fpr64 = COPY $d0 + ; CHECK: %rev:fpr64 = REV16v8i8 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<8 x s8>) = COPY $d0 + %rev:fpr(<8 x s8>) = G_REV16 %copy + RET_ReallyLR + +... +--- +name: rev16_REV16v16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: rev16_REV16v16i8 + ; CHECK: liveins: $q0 + ; CHECK: %copy:fpr128 = COPY $q0 + ; CHECK: %rev:fpr128 = REV16v16i8 %copy + ; CHECK: RET_ReallyLR + %copy:fpr(<16 x s8>) = COPY $q0 + %rev:fpr(<16 x s8>) = G_REV16 %copy + RET_ReallyLR