diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" @@ -370,22 +371,62 @@ return false; } +// Check if an EXT instruction can handle the shuffle mask when the vector +// sources of the shuffle are the same. +static bool IsSingletonEXTMask(ArrayRef M, LLT Ty, uint64_t &Imm) { + unsigned NumElts = Ty.getNumElements(); + + // Assume that the first shuffle index is not UNDEF. Fail if it is. + if (M[0] < 0) + return false; + + Imm = M[0]; + + // If this is a VEXT shuffle, the immediate value is the index of the first + // element. The other shuffle indices must be the successive elements after + // the first one. + unsigned ExpectedElt = Imm; + for (unsigned I = 1; I < NumElts; ++I) { + // Increment the expected index. If it wraps around, just follow it + // back to index zero and keep going. + ++ExpectedElt; + if (ExpectedElt == NumElts) + ExpectedElt = 0; + + if (M[I] < 0) + continue; // Ignore UNDEF indices. + if (ExpectedElt != static_cast(M[I])) + return false; + } + + return true; +} + static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); Register Dst = MI.getOperand(0).getReg(); - auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(), - MRI.getType(Dst).getNumElements()); - if (!ExtInfo) - return false; - bool ReverseExt; - uint64_t Imm; - std::tie(ReverseExt, Imm) = *ExtInfo; + LLT DstTy = MRI.getType(Dst); Register V1 = MI.getOperand(1).getReg(); Register V2 = MI.getOperand(2).getReg(); + auto Mask = MI.getOperand(3).getShuffleMask(); + uint64_t Imm; + auto ExtInfo = getExtMask(Mask, DstTy.getNumElements()); + uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; + + if (!ExtInfo) { + if (!getOpcodeDef(V2, MRI) || + !IsSingletonEXTMask(Mask, DstTy, Imm)) + return false; + + Imm *= ExtFactor; + MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm}); + return true; + } + bool ReverseExt; + std::tie(ReverseExt, Imm) = *ExtInfo; if (ReverseExt) std::swap(V1, V2); - uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; Imm *= ExtFactor; MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir @@ -267,3 +267,25 @@ $q0 = COPY %shuf(<8 x s16>) RET_ReallyLR implicit $q0 ... +--- +name: v2s64_singleton_ext +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + ; CHECK-LABEL: name: v2s64_singleton_ext + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_EXT %v1, %v1, [[C]](s32) + ; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %v1:_(<2 x s64>) = COPY $q0 + %v2:_(<2 x s64>) = G_IMPLICIT_DEF + %shuf:_(<2 x s64>) = G_SHUFFLE_VECTOR %v1(<2 x s64>), %v2, shufflemask(1, 0) + $q0 = COPY %shuf(<2 x s64>) + RET_ReallyLR implicit $q0 +...