diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" @@ -370,22 +371,60 @@ return false; } +// Check if an EXT instruction can handle the shuffle mask when the vector +// sources of the shuffle are the same. +static bool isSingletonExtMask(ArrayRef M, LLT Ty) { + unsigned NumElts = Ty.getNumElements(); + + // Assume that the first shuffle index is not UNDEF. Fail if it is. + if (M[0] < 0) + return false; + + // If this is a VEXT shuffle, the immediate value is the index of the first + // element. The other shuffle indices must be the successive elements after + // the first one. + unsigned ExpectedElt = M[0]; + for (unsigned I = 1; I < NumElts; ++I) { + // Increment the expected index. If it wraps around, just follow it + // back to index zero and keep going. + ++ExpectedElt; + if (ExpectedElt == NumElts) + ExpectedElt = 0; + + if (M[I] < 0) + continue; // Ignore UNDEF indices. + if (ExpectedElt != static_cast(M[I])) + return false; + } + + return true; +} + static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); Register Dst = MI.getOperand(0).getReg(); - auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(), - MRI.getType(Dst).getNumElements()); - if (!ExtInfo) - return false; - bool ReverseExt; - uint64_t Imm; - std::tie(ReverseExt, Imm) = *ExtInfo; + LLT DstTy = MRI.getType(Dst); Register V1 = MI.getOperand(1).getReg(); Register V2 = MI.getOperand(2).getReg(); + auto Mask = MI.getOperand(3).getShuffleMask(); + uint64_t Imm; + auto ExtInfo = getExtMask(Mask, DstTy.getNumElements()); + uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; + + if (!ExtInfo) { + if (!getOpcodeDef(V2, MRI) || + !isSingletonExtMask(Mask, DstTy)) + return false; + + Imm = Mask[0] * ExtFactor; + MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm}); + return true; + } + bool ReverseExt; + std::tie(ReverseExt, Imm) = *ExtInfo; if (ReverseExt) std::swap(V1, V2); - uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; Imm *= ExtFactor; MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir @@ -267,3 +267,69 @@ $q0 = COPY %shuf(<8 x s16>) RET_ReallyLR implicit $q0 ... +--- +name: v2s64_singleton_ext +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + ; CHECK-LABEL: name: v2s64_singleton_ext + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_EXT %v1, %v1, [[C]](s32) + ; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %v1:_(<2 x s64>) = COPY $q0 + %v2:_(<2 x s64>) = G_IMPLICIT_DEF + %shuf:_(<2 x s64>) = G_SHUFFLE_VECTOR %v1(<2 x s64>), %v2, shufflemask(1, 0) + $q0 = COPY %shuf(<2 x s64>) + RET_ReallyLR implicit $q0 +... +--- +name: v2s64_singleton_ext_all_undef +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + ; CHECK-LABEL: name: v2s64_singleton_ext_all_undef + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: %v2:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_ZIP2 %v1, %v2 + ; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %v1:_(<2 x s64>) = COPY $q0 + %v2:_(<2 x s64>) = G_IMPLICIT_DEF + %shuf:_(<2 x s64>) = G_SHUFFLE_VECTOR %v1(<2 x s64>), %v2, shufflemask(undef, undef) + $q0 = COPY %shuf(<2 x s64>) + RET_ReallyLR implicit $q0 +... +--- +name: v2s64_singleton_ext_same +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + ; CHECK-LABEL: name: v2s64_singleton_ext_same + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_DUPLANE64 %v1, [[C]](s64) + ; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %v1:_(<2 x s64>) = COPY $q0 + %v2:_(<2 x s64>) = G_IMPLICIT_DEF + %shuf:_(<2 x s64>) = G_SHUFFLE_VECTOR %v1(<2 x s64>), %v2, shufflemask(1, 1) + $q0 = COPY %shuf(<2 x s64>) + RET_ReallyLR implicit $q0 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-uzp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-uzp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-uzp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-uzp.mir @@ -16,11 +16,12 @@ ; CHECK-LABEL: name: uzp1_v4s32 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]] - ; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]] + ; CHECK-NEXT: $q0 = COPY [[UZP1_]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 2, 4, 6) @@ -38,15 +39,16 @@ ; CHECK-LABEL: name: uzp2_v4s32 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]] - ; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[COPY1]] + ; CHECK-NEXT: $q0 = COPY [[UZP2_]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 - %1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, 5, 7) - $q0 = COPY %1(<4 x s32>) + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, 5, 7) + $q0 = COPY %2(<4 x s32>) RET_ReallyLR implicit $q0 ... @@ -62,11 +64,12 @@ ; CHECK-LABEL: name: no_uzp1 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, 6) - ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, 6) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 1, 4, 6) @@ -86,11 +89,12 @@ ; CHECK-LABEL: name: no_uzp2 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(1, 4, 5, 7) - ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(1, 4, 5, 7) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 4, 5, 7) @@ -110,11 +114,12 @@ ; CHECK-LABEL: name: uzp1_undef ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]] - ; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]] + ; CHECK-NEXT: $q0 = COPY [[UZP1_]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, -1, 4, 6) @@ -134,13 +139,14 @@ ; CHECK-LABEL: name: uzp2_undef ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]] - ; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[COPY1]] + ; CHECK-NEXT: $q0 = COPY [[UZP2_]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 - %1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, -1, 7) - $q0 = COPY %1(<4 x s32>) + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, -1, 7) + $q0 = COPY %2(<4 x s32>) RET_ReallyLR implicit $q0