Index: llvm/lib/Target/AArch64/AArch64Combine.td =================================================================== --- llvm/lib/Target/AArch64/AArch64Combine.td +++ llvm/lib/Target/AArch64/AArch64Combine.td @@ -56,9 +56,16 @@ (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) >; +def ext: GICombineRule < + (defs root:$root, shuffle_matchdata:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return matchEXT(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyEXT(*${root}, ${matchinfo}); }]) +>; + // Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo // instruction. -def shuffle_vector_pseudos : GICombineGroup<[dup, rev, zip, uzp]>; +def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp]>; def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", Index: llvm/lib/Target/AArch64/AArch64InstrGISel.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -81,6 +81,13 @@ let InOperandList = (ins type1:$lane); } +// Represents an ext instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_EXT: AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm); +} + def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -89,3 +96,4 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; Index: llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -95,6 +95,48 @@ return true; } +/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector +/// sources of the shuffle are different. +static Optional> isEXTMask(ArrayRef M, + unsigned NumElts) { + // Look for the first non-undef element. + auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); + if (FirstRealElt == M.end()) + return None; + + // Use APInt to handle overflow when calculating expected element. + unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); + APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); + + // The following shuffle indices must be the successive elements after the + // first real element. + if (any_of( + make_range(std::next(FirstRealElt), M.end()), + [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) + return None; + + // The index of an EXT is the first element if it is not UNDEF. + // Watch out for the beginning UNDEFs. The EXT index should be the expected + // value of the first element. E.g. + // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. + // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. + // ExpectedElt is the last mask index plus 1. + uint64_t Imm = ExpectedElt.getZExtValue(); + bool ReverseExt = false; + + // There are two difference cases requiring to reverse input vectors. + // For example, for vector <4 x i32> we have the following cases, + // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) + // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) + // For both cases, we finally use mask <5, 6, 7, 0>, which requires + // to reverse two input vectors. + if (Imm < NumElts) + ReverseExt = true; + else + Imm -= NumElts; + return std::make_pair(ReverseExt, Imm); +} + /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. static bool isUZPMask(ArrayRef M, unsigned NumElts, @@ -242,6 +284,27 @@ return true; } +static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + Register Dst = MI.getOperand(0).getReg(); + auto ExtInfo = isEXTMask(MI.getOperand(3).getShuffleMask(), + MRI.getType(Dst).getNumElements()); + if (!ExtInfo) + return false; + bool ReverseExt; + uint64_t Imm; + std::tie(ReverseExt, Imm) = *ExtInfo; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + if (ReverseExt) + std::swap(V1, V2); + uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; + Imm *= ExtFactor; + MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); + return true; +} + /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. static bool applyShuffleVectorPseudo(MachineInstr &MI, @@ -252,6 +315,21 @@ return true; } +/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT. +/// Special-cased because the constant operand must be emitted as a G_CONSTANT +/// for the imported tablegen patterns to work. +static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { + MachineIRBuilder MIRBuilder(MI); + // Tablegen patterns expect an i32 G_CONSTANT as the final op. + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + Register Cst = MRI.createGenericVirtualRegister(LLT::scalar(32)); + MIRBuilder.buildConstant(Cst, MatchInfo.SrcOps[2].getImm()); + MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, + {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); + MI.eraseFromParent(); + return true; +} + #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS Index: llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-ext.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-ext.mir @@ -0,0 +1,258 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# +# Check that we can combine a G_SHUFFLE_VECTOR into a G_EXT. + +... +--- +name: v8s8_cst3 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: v8s8_cst3 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(3, 4, 5, 6, 7, 8, 9, 10) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: v8s8_cst5 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: v8s8_cst5 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(13, 14, 15, 0, 1, 2, 3, 4) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: v16s8_cst3 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: v16s8_cst3 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK: %v2:_(<16 x s8>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: %shuf:_(<16 x s8>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<16 x s8>) = COPY $q0 + %v2:_(<16 x s8>) = COPY $q1 + %shuf:_(<16 x s8>) = G_SHUFFLE_VECTOR %v1(<16 x s8>), %v2, shufflemask(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18) + $q0 = COPY %shuf(<16 x s8>) + RET_ReallyLR implicit $q0 +... +--- +name: v16s8_cst7 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: v16s8_cst7 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK: %v2:_(<16 x s8>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK: %shuf:_(<16 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<16 x s8>) = COPY $q0 + %v2:_(<16 x s8>) = COPY $q1 + %shuf:_(<16 x s8>) = G_SHUFFLE_VECTOR %v1(<16 x s8>), %v2, shufflemask(23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6) + $q0 = COPY %shuf(<16 x s8>) + RET_ReallyLR implicit $q0 +... +--- +name: v4s16_cst6 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: v4s16_cst6 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<4 x s16>) = COPY $d0 + ; CHECK: %v2:_(<4 x s16>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK: %shuf:_(<4 x s16>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<4 x s16>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<4 x s16>) = COPY $d0 + %v2:_(<4 x s16>) = COPY $d1 + %shuf:_(<4 x s16>) = G_SHUFFLE_VECTOR %v1(<4 x s16>), %v2, shufflemask(3, 4, 5, 6) + $d0 = COPY %shuf(<4 x s16>) + RET_ReallyLR implicit $d0 +... +--- +name: v4s32_cst12 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: v4s32_cst12 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<4 x s32>) = COPY $q0 + ; CHECK: %v2:_(<4 x s32>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: %shuf:_(<4 x s32>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<4 x s32>) = COPY $q0 + %v2:_(<4 x s32>) = COPY $q1 + %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %v1(<4 x s32>), %v2, shufflemask(3, 4, 5, 6) + $q0 = COPY %shuf(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: undef_elts_should_match_1 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 3 here. + ; + ; CHECK-LABEL: name: undef_elts_should_match_1 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(3, -1, -1, 6, 7, 8, 9, 10) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: undef_elts_should_match_2 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 6 here. + ; + ; CHECK-LABEL: name: undef_elts_should_match_2 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(-1, -1, -1, -1, 2, 3, 4, 5) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: undef_elts_should_match_3 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 7 here. + ; CHECK-LABEL: name: undef_elts_should_match_3 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK: %v2:_(<16 x s8>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK: %shuf:_(<16 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<16 x s8>) = COPY $q0 + %v2:_(<16 x s8>) = COPY $q1 + %shuf:_(<16 x s8>) = G_SHUFFLE_VECTOR %v1(<16 x s8>), %v2, shufflemask(23, 24, 25, 26, -1, -1, 29, 30, 31, 0, 1, 2, 3, 4, -1, 6) + $q0 = COPY %shuf(<16 x s8>) + RET_ReallyLR implicit $q0 +... +--- +name: undef_elts_should_match_4 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 10 here. + ; CHECK-LABEL: name: undef_elts_should_match_4 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<8 x s16>) = COPY $q0 + ; CHECK: %v2:_(<8 x s16>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK: %shuf:_(<8 x s16>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<8 x s16>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<8 x s16>) = COPY $q0 + %v2:_(<8 x s16>) = COPY $q1 + %shuf:_(<8 x s16>) = G_SHUFFLE_VECTOR %v1(<8 x s16>), %v2, shufflemask(-1, -1, -1, -1, 1, 2, 3, 4) + $q0 = COPY %shuf(<8 x s16>) + RET_ReallyLR implicit $q0 +... +--- +name: all_undef +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; We expect at least one defined element in the shuffle mask. + ; + ; CHECK-LABEL: name: all_undef + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<8 x s16>) = COPY $q0 + ; CHECK: %shuf:_(<8 x s16>) = G_REV64 %v1 + ; CHECK: $q0 = COPY %shuf(<8 x s16>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<8 x s16>) = COPY $q0 + %v2:_(<8 x s16>) = COPY $q1 + %shuf:_(<8 x s16>) = G_SHUFFLE_VECTOR %v1(<8 x s16>), %v2, shufflemask(-1, -1, -1, -1, -1, -1, -1, -1) + $q0 = COPY %shuf(<8 x s16>) + RET_ReallyLR implicit $q0 +... Index: llvm/test/CodeGen/AArch64/GlobalISel/select-ext.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/select-ext.mir @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Test G_EXT selection using AArch64ext patterns. + +... +--- +name: v8s8_EXTv8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: v8s8_EXTv8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:fpr64 = COPY $d0 + ; CHECK: %v2:fpr64 = COPY $d1 + ; CHECK: %shuf:fpr64 = EXTv8i8 %v1, %v2, 3 + %v1:fpr(<8 x s8>) = COPY $d0 + %v2:fpr(<8 x s8>) = COPY $d1 + %3:gpr(s32) = G_CONSTANT i32 3 + %shuf:fpr(<8 x s8>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v16s8_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v16s8_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v1, %v2, 3 + %v1:fpr(<16 x s8>) = COPY $q0 + %v2:fpr(<16 x s8>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 3 + %shuf:fpr(<16 x s8>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v4s16_EXTv8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: v4s16_EXTv8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:fpr64 = COPY $d0 + ; CHECK: %v2:fpr64 = COPY $d1 + ; CHECK: %shuf:fpr64 = EXTv8i8 %v1, %v2, 6 + %v1:fpr(<4 x s16>) = COPY $d0 + %v2:fpr(<4 x s16>) = COPY $d1 + %3:gpr(s32) = G_CONSTANT i32 6 + %shuf:fpr(<4 x s16>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v8s16_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v8s16_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v2, %v1, 10 + %v1:fpr(<8 x s16>) = COPY $q0 + %v2:fpr(<8 x s16>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 10 + %shuf:fpr(<8 x s16>) = G_EXT %v2, %v1, %3(s32) +... + +... +--- +name: v4s32_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v4s32_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v1, %v2, 12 + %v1:fpr(<4 x s32>) = COPY $q0 + %v2:fpr(<4 x s32>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 12 + %shuf:fpr(<4 x s32>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v2s32_EXTv8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: v2s32_EXTv8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:fpr64 = COPY $d0 + ; CHECK: %v2:fpr64 = COPY $d1 + ; CHECK: %shuf:fpr64 = EXTv8i8 %v1, %v2, 2 + %v1:fpr(<2 x s32>) = COPY $d0 + %v2:fpr(<2 x s32>) = COPY $d1 + %3:gpr(s32) = G_CONSTANT i32 2 + %shuf:fpr(<2 x s32>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v2s64_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v2s64_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v1, %v2, 2 + %v1:fpr(<2 x s64>) = COPY $q0 + %v2:fpr(<2 x s64>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 2 + %shuf:fpr(<2 x s64>) = G_EXT %v1, %v2, %3(s32) +...