Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7261,32 +7261,32 @@ Register Op2Reg = MI.getOperand(3).getReg(); LLT DstTy = MRI.getType(DstReg); LLT MaskTy = MRI.getType(MaskReg); - LLT Op1Ty = MRI.getType(Op1Reg); if (!DstTy.isVector()) return UnableToLegalize; - // Vector selects can have a scalar predicate. If so, splat into a vector and - // finish for later legalization attempts to try again. if (MaskTy.isScalar()) { - // FIXME: We shouldn't be promoting the mask type here, or even the - // broadcast. The broadcast should probably be handled as MoreElements, not - // lower. + // Turn the scalar condition into a vector condition mask. + Register MaskElt = MaskReg; - if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) { - // FIXME: We have no way of knowing if this is FP without the original - // boolean's context. - MaskElt = MIRBuilder.buildBoolExt(DstTy.getElementType(), - MaskElt, false).getReg(0); + + // The condition was potentially zero extended before, but we want a sign + // extended boolean. + if (MaskTy.getSizeInBits() <= DstTy.getScalarSizeInBits() && + MaskTy != LLT::scalar(1)) { + MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0); } - // Generate a vector splat idiom to be pattern matched later. + + // Continue the sign extension (or truncate) to match the data type. + MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(), + MaskElt).getReg(0); + + // Generate a vector splat idiom. auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); - Observer.changingInstr(MI); - MI.getOperand(1).setReg(ShufSplat.getReg(0)); - Observer.changedInstr(MI); - return Legalized; + MaskReg = ShufSplat.getReg(0); + MaskTy = DstTy; } - if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { + if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) { return UnableToLegalize; } Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -185,3 +185,150 @@ %trunc:_(s64) = G_TRUNC %select $x0 = COPY %trunc RET_ReallyLR implicit $x0 +... + +# The select condition has already been zero extended to s32, and +# needs a sext_inreg to get a vector boolean. +--- +name: scalar_mask_already_promoted_select_s32_v4s32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: scalar_mask_already_promoted_select_s32_v4s32 + ; CHECK: liveins: $q0, $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY2]](s32), [[C2]](s64) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(0, 0, 0, 0) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND]], [[AND1]] + ; CHECK-NEXT: $q0 = COPY [[OR]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %0:_(s32) = COPY $w0 + %1:_(<4 x s32>) = COPY $q0 + %2:_(s32) = G_CONSTANT i32 4100 + %6:_(s32) = G_FCONSTANT float 0.000000e+00 + %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32) + %3:_(s32) = G_ICMP intpred(eq), %0(s32), %2 + %4:_(<4 x s32>) = G_SELECT %3(s32), %1, %5 + $q0 = COPY %4(<4 x s32>) + RET_ReallyLR implicit $q0 + +... + +# The scalar select condition was zero extended to s32, to a different +# type from the vector width. It needs to be sign extended inreg, and +# then sign extended to the full element width. + +--- +name: scalar_mask_select_s32_v4s64 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: scalar_mask_select_s32_v4s64 + ; CHECK: liveins: $q0, $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT]](s64), [[C2]](s64) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x s64>), [[DEF]], shufflemask(0, 0) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C3]](s64), [[C3]](s64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[COPY1]], [[SHUF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]] + ; CHECK-NEXT: $q0 = COPY [[OR]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %0:_(s32) = COPY $w0 + %1:_(<2 x s64>) = COPY $q0 + %2:_(s32) = G_CONSTANT i32 4100 + %6:_(s64) = G_FCONSTANT double 0.000000e+00 + %5:_(<2 x s64>) = G_BUILD_VECTOR %6, %6 + %3:_(s32) = G_ICMP intpred(eq), %0(s32), %2 + %4:_(<2 x s64>) = G_SELECT %3(s32), %1, %5 + $q0 = COPY %4 + RET_ReallyLR implicit $q0 + +... + +# Check degenerate case where the selected element size is the same as +# the condition bitwidth. +--- +name: select_v4s1_s1 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1, $q2, $w0 + + ; CHECK-LABEL: name: select_v4s1_s1 + ; CHECK: liveins: $q0, $q1, $q2, $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %w0:_(s32) = COPY $w0 + ; CHECK-NEXT: %q0:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: %q1:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: %q2:_(<4 x s32>) = COPY $q2 + ; CHECK-NEXT: %vec_cond0:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q1 + ; CHECK-NEXT: %vec_cond1:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %w0(s32), [[C]] + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s1) = COPY %cmp(s1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s1>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s1), [[C2]](s64) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s1>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s1>), [[DEF]], shufflemask(0, 0, 0, 0) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s1>) = G_BUILD_VECTOR [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s1>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s1>) = G_AND %vec_cond0, [[SHUF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s1>) = G_AND %vec_cond1, [[XOR]] + ; CHECK-NEXT: %select:_(<4 x s1>) = G_OR [[AND]], [[AND1]] + ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_ZEXT %select(<4 x s1>) + ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %w0:_(s32) = COPY $w0 + %q0:_(<4 x s32>) = COPY $q0 + %q1:_(<4 x s32>) = COPY $q1 + %q2:_(<4 x s32>) = COPY $q2 + %vec_cond0:_(<4 x s1>) = G_ICMP intpred(eq), %q0, %q1 + %vec_cond1:_(<4 x s1>) = G_ICMP intpred(eq), %q0, %q2 + %2:_(s32) = G_CONSTANT i32 4100 + %6:_(s32) = G_FCONSTANT float 0.000000e+00 + %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32) + %cmp:_(s1) = G_ICMP intpred(eq), %w0, %2 + %select:_(<4 x s1>) = G_SELECT %cmp, %vec_cond0, %vec_cond1 + %zext_select:_(<4 x s32>) = G_ZEXT %select + $q0 = COPY %zext_select + RET_ReallyLR implicit $q0 + +...