diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -958,6 +958,23 @@ MachineInstrBuilder buildBuildVectorTrunc(const DstOp &Res, ArrayRef Ops); + /// Build and insert a vector splat of a scalar \p Src using a + /// G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idiom. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Src must have the same type as the element type of \p Dst + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src); + + /// Build and insert \p Res = G_SHUFFLE_VECTOR \p Src1, \p Src2, \p Mask + /// + /// \pre setBasicBlock or setMI must have been called. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, + const SrcOp &Src2, ArrayRef Mask); + /// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ... /// /// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -6217,8 +6217,23 @@ if (!DstTy.isVector()) return UnableToLegalize; - if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) + // Vector selects can have a scalar predicate. If so, splat into a vector and + // finish for later legalization attempts to try again. + if (MaskTy.isScalar()) { + Register MaskElt = MaskReg; + if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) + MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); + // Generate a vector splat idiom to be pattern matched later. + auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(ShufSplat.getReg(0)); + Observer.changedInstr(MI); + return Legalized; + } + + if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { return UnableToLegalize; + } auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -635,6 +635,33 @@ return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec); } +MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res, + const SrcOp &Src) { + LLT DstTy = Res.getLLTTy(*getMRI()); + LLT SrcTy = Src.getLLTTy(*getMRI()); + assert(SrcTy == DstTy.getElementType() && "Expected Src to match Dst elt ty"); + auto UndefVec = buildUndef(DstTy); + auto Zero = buildConstant(LLT::scalar(64), 0); + auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero); + SmallVector ZeroMask(DstTy.getNumElements()); + return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask); +} + +MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, + const SrcOp &Src1, + const SrcOp &Src2, + ArrayRef Mask) { + LLT DstTy = Res.getLLTTy(*getMRI()); + LLT Src1Ty = Src1.getLLTTy(*getMRI()); + LLT Src2Ty = Src2.getLLTTy(*getMRI()); + assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size()); + assert(DstTy.getElementType() == Src1Ty.getElementType() && + DstTy.getElementType() == Src2Ty.getElementType()); + ArrayRef MaskAlloc = getMF().allocateShuffleMask(Mask); + return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2}) + .addShuffleMask(MaskAlloc); +} + MachineInstrBuilder MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef Ops) { // Unfortunately to convert from ArrayRef to ArrayRef, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -444,7 +444,7 @@ .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) .clampScalar(0, s32, s64) .widenScalarToNextPow2(0) - .minScalarEltSameAsIf(isVector(0), 1, 0) + .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0) .lowerIf(isVector(0)); // Pointer-handling diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -114,3 +114,47 @@ $q0 = COPY %5(<16 x s8>) RET_ReallyLR implicit $q0 ... +--- +name: scalar_mask +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: scalar_mask + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 1 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT_INREG]](s32), [[C2]](s64) + ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(0, 0, 0, 0) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]] + ; CHECK: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]] + ; CHECK: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]] + ; CHECK: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND]], [[AND1]] + ; CHECK: $q0 = COPY [[OR]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(s32) = COPY $w0 + %1:_(<4 x s32>) = COPY $q0 + %2:_(s32) = G_CONSTANT i32 4100 + %6:_(s32) = G_FCONSTANT float 0.000000e+00 + %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32) + %3:_(s1) = G_ICMP intpred(eq), %0(s32), %2 + %4:_(<4 x s32>) = G_SELECT %3(s1), %1, %5 + $q0 = COPY %4(<4 x s32>) + RET_ReallyLR implicit $q0 + +...