diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -213,5 +213,10 @@ LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy); +/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat. +/// If \p MI is not a splat, returns None. +Optional getSplatIndex(MachineInstr &MI); + + } // End namespace llvm. #endif diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -604,3 +604,24 @@ unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize); return LLT::scalar(GCD); } + +Optional llvm::getSplatIndex(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && + "Only G_SHUFFLE_VECTOR can have a splat index!"); + ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; }); + + // If all elements are undefined, this shuffle can be considered a splat. + // Return 0 for better potential for callers to simplify. + if (FirstDefinedIdx == Mask.end()) + return 0; + + // Make sure all remaining elements are either undef or the same + // as the first non-undef value. + int SplatValue = *FirstDefinedIdx; + if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()), + [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; })) + return None; + + return SplatValue; +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -133,6 +133,8 @@ bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool tryOptShuffleDupLane(MachineInstr &I, LLT DstTy, LLT SrcTy, ArrayRef Mask, + MachineRegisterInfo &MRI) const; bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const; @@ -4306,6 +4308,67 @@ return &*CmpMI; } +bool AArch64InstructionSelector::tryOptShuffleDupLane( + MachineInstr &I, LLT DstTy, LLT SrcTy, ArrayRef Mask, + MachineRegisterInfo &MRI) const { + assert(I.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + + // We assume that scalar->vector splats have been been handled in the + // post-legalizer combiner to G_DUP. However splats of a source vector's + // lane don't fit that pattern, detect it here: + // %res = G_SHUFFLE_VECTOR %src:, undef, splay(lane-idx) + // => + // %res = DUPv[N][Ty]lane %src, lane-idx + // FIXME: this case should be covered by re-implementing the perfect shuffle + // codegen mechanism. + + auto LaneIdx = getSplatIndex(I); + if (!LaneIdx) + return false; + + // The lane idx should be within the first source vector. + if (*LaneIdx >= SrcTy.getNumElements()) + return false; + + if (DstTy != SrcTy) + return false; + + LLT ScalarTy = SrcTy.getElementType(); + unsigned ScalarSize = ScalarTy.getSizeInBits(); + + unsigned Opc = 0; + switch (SrcTy.getNumElements()) { + case 2: + if (ScalarSize == 64) + Opc = AArch64::DUPv2i64lane; + break; + case 4: + if (ScalarSize == 32) + Opc = AArch64::DUPv4i32lane; + break; + case 8: + if (ScalarSize == 16) + Opc = AArch64::DUPv8i16lane; + break; + case 16: + if (ScalarSize == 8) + Opc = AArch64::DUPv16i8lane; + break; + default: + break; + } + if (!Opc) + return false; + + MachineIRBuilder MIB(I); + auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, + {I.getOperand(1).getReg()}) + .addImm(*LaneIdx); + constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI); + I.eraseFromParent(); + return true; +} + bool AArch64InstructionSelector::selectShuffleVector( MachineInstr &I, MachineRegisterInfo &MRI) const { const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); @@ -4327,6 +4390,9 @@ return false; } + if (tryOptShuffleDupLane(I, DstTy, Src1Ty, Mask, MRI)) + return true; + unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; SmallVector CstIdxs; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -43,29 +44,6 @@ ShuffleVectorPseudo() {} }; -/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat. -/// If \p MI is not a splat, returns None. -static Optional getSplatIndex(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && - "Only G_SHUFFLE_VECTOR can have a splat index!"); - ArrayRef Mask = MI.getOperand(3).getShuffleMask(); - auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; }); - - // If all elements are undefined, this shuffle can be considered a splat. - // Return 0 for better potential for callers to simplify. - if (FirstDefinedIdx == Mask.end()) - return 0; - - // Make sure all remaining elements are either undef or the same - // as the first non-undef value. - int SplatValue = *FirstDefinedIdx; - if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()), - [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; })) - return None; - - return SplatValue; -} - /// Check if a vector shuffle corresponds to a REV instruction with the /// specified blocksize. static bool isREVMask(ArrayRef M, unsigned EltSize, unsigned NumElts, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-to-duplane.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-to-duplane.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-to-duplane.mir @@ -0,0 +1,103 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +... +--- +name: duplane_v16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: duplane_v16i8 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[DUPv16i8lane:%[0-9]+]]:fpr128 = DUPv16i8lane [[COPY]], 0 + ; CHECK: $q0 = COPY [[DUPv16i8lane]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<16 x s8>) = COPY $q0 + %2:fpr(<16 x s8>) = G_IMPLICIT_DEF + %1:fpr(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + $q0 = COPY %1(<16 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: duplane_v8i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: duplane_v8i16 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[DUPv8i16lane:%[0-9]+]]:fpr128 = DUPv8i16lane [[COPY]], 0 + ; CHECK: $q0 = COPY [[DUPv8i16lane]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<8 x s16>) = COPY $q0 + %2:fpr(<8 x s16>) = G_IMPLICIT_DEF + %1:fpr(<8 x s16>) = G_SHUFFLE_VECTOR %0(<8 x s16>), %2, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) + $q0 = COPY %1(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: duplane_v4f32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: duplane_v4f32 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[COPY]], 0 + ; CHECK: $q0 = COPY [[DUPv4i32lane]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<4 x s32>) = COPY $q0 + %2:fpr(<4 x s32>) = G_IMPLICIT_DEF + %1:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %2, shufflemask(0, 0, 0, 0) + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: duplane_v2i64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: duplane_v2i64 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[COPY]], 0 + ; CHECK: $q0 = COPY [[DUPv2i64lane]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<2 x s64>) = COPY $q0 + %2:fpr(<2 x s64>) = G_IMPLICIT_DEF + %1:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(0, 0) + $q0 = COPY %1(<2 x s64>) + RET_ReallyLR implicit $q0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir @@ -140,7 +140,7 @@ ; CHECK-LABEL: name: shuffle_v2i64 ; CHECK: constants: - ; CHECK: value: '<16 x i8> ' + ; CHECK: value: '<16 x i8> ' ; CHECK: alignment: 16 ; CHECK: isTargetSpecific: false ; CHECK: liveins: $q0, $q1 @@ -154,7 +154,7 @@ ; CHECK: RET_ReallyLR implicit $q0 %0:fpr(<2 x s64>) = COPY $q0 %1:fpr(<2 x s64>) = COPY $q1 - %2:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, shufflemask(0, 0) + %2:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, shufflemask(1, 0) $q0 = COPY %2(<2 x s64>) RET_ReallyLR implicit $q0