Index: llvm/lib/Target/AArch64/AArch64Combine.td =================================================================== --- llvm/lib/Target/AArch64/AArch64Combine.td +++ llvm/lib/Target/AArch64/AArch64Combine.td @@ -49,9 +49,16 @@ (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) >; +def trn : GICombineRule< + (defs root:$root, shuffle_matchdata:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return matchTRN(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) +>; + // Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo // instruction. -def shuffle_vector_pseudos : GICombineGroup<[rev, zip, uzp]>; +def shuffle_vector_pseudos : GICombineGroup<[rev, zip, uzp, trn]>; def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", Index: llvm/lib/Target/AArch64/AArch64InstrGISel.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -74,6 +74,20 @@ let InOperandList = (ins type0:$v1, type0:$v2); } +// Represents a trn1 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_TRN1 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2); +} + +// Represents a trn2 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_TRN2 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2); +} + def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -81,3 +95,5 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; Index: llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -70,6 +70,22 @@ return true; } +/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts. +/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult. +static bool isTRNMask(ArrayRef M, unsigned NumElts, + unsigned &WhichResult) { + if (NumElts % 2 != 0) + return false; + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((M[i] >= 0 && static_cast(M[i]) != i + WhichResult) || + (M[i + 1] >= 0 && + static_cast(M[i + 1]) != i + NumElts + WhichResult)) + return false; + } + return true; +} + /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. static bool isUZPMask(ArrayRef M, unsigned NumElts, @@ -133,6 +149,24 @@ return false; } +/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with +/// a G_TRN1 or G_TRN2 instruction. +static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + unsigned WhichResult; + ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask(); + Register Dst = MI.getOperand(0).getReg(); + unsigned NumElts = MRI.getType(Dst).getNumElements(); + if (!isTRNMask(ShuffleMask, NumElts, WhichResult)) + return false; + unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); + return true; +} + /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with /// a G_UZP1 or G_UZP2 instruction. /// Index: llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir @@ -0,0 +1,234 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# +# Check that we produce G_TRN1 or G_TRN2 when we have an appropriate shuffle +# mask. +# + +... +--- +name: trn1_v8s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; CHECK-LABEL: name: trn1_v8s8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<8 x s8>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN1_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 8, 2, 10, 4, 12, 6, 14) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn2_v8s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; CHECK-LABEL: name: trn2_v8s8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<8 x s8>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN2_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(1, 9, 3, 11, 5, 13, 7, 15) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn1_v16s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn1_v16s8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<16 x s8>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN1_]](<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<16 x s8>) = COPY $q0 + %1:_(<16 x s8>) = COPY $q1 + %2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30) + $q0 = COPY %2(<16 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn2_v16s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn2_v16s8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<16 x s8>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN2_]](<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<16 x s8>) = COPY $q0 + %1:_(<16 x s8>) = COPY $q1 + %2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31) + $q0 = COPY %2(<16 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn1_v4s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn1_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<4 x s32>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN1_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 4, 2, 6) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn2_v4s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn2_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<4 x s32>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN2_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 5, 3, 7) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: redundant_with_zip1 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; 2 x s32 TRN is redundant with ZIP. Make sure we prioritize ZIP. + ; + ; CHECK-LABEL: name: redundant_with_zip1 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK: [[ZIP1_:%[0-9]+]]:_(<2 x s32>) = G_ZIP1 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ZIP1_]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(0, 2) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: redundant_with_zip2 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; 2 x s32 TRN is redundant with ZIP. Make sure we prioritize ZIP. + ; + ; CHECK-LABEL: name: redundant_with_zip2 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK: [[ZIP2_:%[0-9]+]]:_(<2 x s32>) = G_ZIP2 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ZIP2_]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, 3) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: trn1_undef +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching to G_TRN1. + ; + ; CHECK-LABEL: name: trn1_undef + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<8 x s8>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN1_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 8, -1, -1, 4, 12, 6, 14) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: trn2_undef +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching to G_TRN2. + ; + ; CHECK-LABEL: name: trn2_undef + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<8 x s8>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN2_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(1, -1, 3, 11, 5, 13, -1, -1) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $d0 Index: llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir @@ -0,0 +1,300 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Test that we can select G_TRN1 and G_TRN2. +# +# Each testcase is named based off of the instruction which should be selected. + +... +--- +name: TRN1v2i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN1v2i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN1v2i32_:%[0-9]+]]:fpr64 = TRN1v2i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = COPY $d1 + %2:fpr(<2 x s32>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v2i64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v2i64 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v2i64_:%[0-9]+]]:fpr128 = TRN1v2i64 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = COPY $q1 + %2:fpr(<2 x s64>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v4i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN1v4i16 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN1v4i16_:%[0-9]+]]:fpr64 = TRN1v4i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = COPY $d1 + %2:fpr(<4 x s16>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v4i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v4i32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v4i32_:%[0-9]+]]:fpr128 = TRN1v4i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN1v8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN1v8i8_:%[0-9]+]]:fpr64 = TRN1v8i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s8>) = COPY $d1 + %2:fpr(<8 x s8>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v8i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v8i16 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v8i16_:%[0-9]+]]:fpr128 = TRN1v8i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s16>) = COPY $q0 + %1:fpr(<8 x s16>) = COPY $q1 + %2:fpr(<8 x s16>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v16i8_:%[0-9]+]]:fpr128 = TRN1v16i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<16 x s8>) = COPY $q0 + %1:fpr(<16 x s8>) = COPY $q1 + %2:fpr(<16 x s8>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v2i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN2v2i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN2v2i32_:%[0-9]+]]:fpr64 = TRN2v2i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = COPY $d1 + %2:fpr(<2 x s32>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v2i64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v2i64 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v2i64_:%[0-9]+]]:fpr128 = TRN2v2i64 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = COPY $q1 + %2:fpr(<2 x s64>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v4i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN2v4i16 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN2v4i16_:%[0-9]+]]:fpr64 = TRN2v4i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = COPY $d1 + %2:fpr(<4 x s16>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v4i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v4i32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v4i32_:%[0-9]+]]:fpr128 = TRN2v4i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN2v8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN2v8i8_:%[0-9]+]]:fpr64 = TRN2v8i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s8>) = COPY $d1 + %2:fpr(<8 x s8>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v8i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v8i16 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v8i16_:%[0-9]+]]:fpr128 = TRN2v8i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s16>) = COPY $q0 + %1:fpr(<8 x s16>) = COPY $q1 + %2:fpr(<8 x s16>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v16i8_:%[0-9]+]]:fpr128 = TRN2v16i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<16 x s8>) = COPY $q0 + %1:fpr(<16 x s8>) = COPY $q1 + %2:fpr(<16 x s8>) = G_TRN2 %0, %1 + RET_ReallyLR