diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -6217,8 +6217,34 @@ if (!DstTy.isVector()) return UnableToLegalize; - if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) + // Vector selects can have a scalar predicate. If so, splat into a vector and + // finish for later legalization attempts to try again. + if (MaskTy.isScalar()) { + Register MaskElt = MaskReg; + if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) + MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); + // Generate a vector splat idiom to be pattern matched later. + auto UndefVec = MIRBuilder.buildUndef(DstTy); + auto Zero = MIRBuilder.buildConstant(LLT::scalar(64), 0); + auto InsElt = + MIRBuilder.buildInsertVectorElement(DstTy, UndefVec, MaskElt, Zero); + SmallVector ZeroMask; + for (int I = 0; I < DstTy.getNumElements(); ++I) + ZeroMask.push_back(0); + ArrayRef MaskAlloc = MIRBuilder.getMF().allocateShuffleMask(ZeroMask); + auto ShufSplat = MIRBuilder + .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, + {InsElt, UndefVec}) + .addShuffleMask(MaskAlloc); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(ShufSplat.getReg(0)); + Observer.changedInstr(MI); + return Legalized; + } + + if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { return UnableToLegalize; + } auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -444,7 +444,7 @@ .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) .clampScalar(0, s32, s64) .widenScalarToNextPow2(0) - .minScalarEltSameAsIf(isVector(0), 1, 0) + .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0) .lowerIf(isVector(0)); // Pointer-handling diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -114,3 +114,47 @@ $q0 = COPY %5(<16 x s8>) RET_ReallyLR implicit $q0 ... +--- +name: scalar_mask +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: scalar_mask + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 1 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT_INREG]](s32), [[C2]](s64) + ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(0, 0, 0, 0) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]] + ; CHECK: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]] + ; CHECK: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]] + ; CHECK: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND]], [[AND1]] + ; CHECK: $q0 = COPY [[OR]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(s32) = COPY $w0 + %1:_(<4 x s32>) = COPY $q0 + %2:_(s32) = G_CONSTANT i32 4100 + %6:_(s32) = G_FCONSTANT float 0.000000e+00 + %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32) + %3:_(s1) = G_ICMP intpred(eq), %0(s32), %2 + %4:_(<4 x s32>) = G_SELECT %3(s1), %1, %5 + $q0 = COPY %4(<4 x s32>) + RET_ReallyLR implicit $q0 + +...