Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
===================================================================
--- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7261,32 +7261,32 @@
   Register Op2Reg = MI.getOperand(3).getReg();
   LLT DstTy = MRI.getType(DstReg);
   LLT MaskTy = MRI.getType(MaskReg);
-  LLT Op1Ty = MRI.getType(Op1Reg);
   if (!DstTy.isVector())
     return UnableToLegalize;
 
-  // Vector selects can have a scalar predicate. If so, splat into a vector and
-  // finish for later legalization attempts to try again.
   if (MaskTy.isScalar()) {
-    // FIXME: We shouldn't be promoting the mask type here, or even the
-    // broadcast. The broadcast should probably be handled as MoreElements, not
-    // lower.
+    // Turn the scalar condition into a vector condition mask.
+
     Register MaskElt = MaskReg;
-    if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) {
-      // FIXME: We have no way of knowing if this is FP without the original
-      // boolean's context.
-      MaskElt = MIRBuilder.buildBoolExt(DstTy.getElementType(),
-                                        MaskElt, false).getReg(0);
+
+    // The condition was potentially zero extended before, but we want a sign
+    // extended boolean.
+    if (MaskTy.getSizeInBits() <= DstTy.getScalarSizeInBits() &&
+        MaskTy != LLT::scalar(1)) {
+      MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
     }
-    // Generate a vector splat idiom to be pattern matched later.
+
+    // Continue the sign extension (or truncate) to match the data type.
+    MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
+                                          MaskElt).getReg(0);
+
+    // Generate a vector splat idiom.
     auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
-    Observer.changingInstr(MI);
-    MI.getOperand(1).setReg(ShufSplat.getReg(0));
-    Observer.changedInstr(MI);
-    return Legalized;
+    MaskReg = ShufSplat.getReg(0);
+    MaskTy = DstTy;
   }
 
-  if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
+  if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
     return UnableToLegalize;
   }
 
Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
===================================================================
--- llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
+++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
@@ -185,3 +185,150 @@
     %trunc:_(s64) = G_TRUNC %select
     $x0 = COPY %trunc
     RET_ReallyLR implicit $x0
+...
+
+# The select condition has already been zero extended to s32, and
+# needs a sext_inreg to get a vector boolean.
+---
+name:            scalar_mask_already_promoted_select_s32_v4s32
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $w0
+
+    ; CHECK-LABEL: name: scalar_mask_already_promoted_select_s32_v4s32
+    ; CHECK: liveins: $q0, $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY2]](s32), [[C2]](s64)
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(0, 0, 0, 0)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]]
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND]], [[AND1]]
+    ; CHECK-NEXT: $q0 = COPY [[OR]](<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(s32) = COPY $w0
+    %1:_(<4 x s32>) = COPY $q0
+    %2:_(s32) = G_CONSTANT i32 4100
+    %6:_(s32) = G_FCONSTANT float 0.000000e+00
+    %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
+    %3:_(s32) = G_ICMP intpred(eq), %0(s32), %2
+    %4:_(<4 x s32>) = G_SELECT %3(s32), %1, %5
+    $q0 = COPY %4(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+
+# The scalar select condition was zero extended to s32, to a different
+# type from the vector width. It needs to be sign extended inreg, and
+# then sign extended to the full element width.
+
+---
+name:            scalar_mask_select_s32_v4s64
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $w0
+
+    ; CHECK-LABEL: name: scalar_mask_select_s32_v4s64
+    ; CHECK: liveins: $q0, $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXT_INREG]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT]](s64), [[C2]](s64)
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x s64>), [[DEF]], shufflemask(0, 0)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C3]](s64), [[C3]](s64)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[COPY1]], [[SHUF]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR]], [[XOR]]
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]]
+    ; CHECK-NEXT: $q0 = COPY [[OR]](<2 x s64>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(s32) = COPY $w0
+    %1:_(<2 x s64>) = COPY $q0
+    %2:_(s32) = G_CONSTANT i32 4100
+    %6:_(s64) = G_FCONSTANT double 0.000000e+00
+    %5:_(<2 x s64>) = G_BUILD_VECTOR %6, %6
+    %3:_(s32) = G_ICMP intpred(eq), %0(s32), %2
+    %4:_(<2 x s64>) = G_SELECT %3(s32), %1, %5
+    $q0 = COPY %4
+    RET_ReallyLR implicit $q0
+
+...
+
+# Check degenerate case where the selected element size is the same as
+# the condition bitwidth.
+---
+name:            select_v4s1_s1
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1, $q2, $w0
+
+    ; CHECK-LABEL: name: select_v4s1_s1
+    ; CHECK: liveins: $q0, $q1, $q2, $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %w0:_(s32) = COPY $w0
+    ; CHECK-NEXT: %q0:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: %q1:_(<4 x s32>) = COPY $q1
+    ; CHECK-NEXT: %q2:_(<4 x s32>) = COPY $q2
+    ; CHECK-NEXT: %vec_cond0:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q1
+    ; CHECK-NEXT: %vec_cond1:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q2
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+    ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %w0(s32), [[C]]
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s1) = COPY %cmp(s1)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s1), [[C2]](s64)
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s1>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s1>), [[DEF]], shufflemask(0, 0, 0, 0)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s1>) = G_BUILD_VECTOR [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s1>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s1>) = G_AND %vec_cond0, [[SHUF]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s1>) = G_AND %vec_cond1, [[XOR]]
+    ; CHECK-NEXT: %select:_(<4 x s1>) = G_OR [[AND]], [[AND1]]
+    ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_ZEXT %select(<4 x s1>)
+    ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %w0:_(s32) = COPY $w0
+    %q0:_(<4 x s32>) = COPY $q0
+    %q1:_(<4 x s32>) = COPY $q1
+    %q2:_(<4 x s32>) = COPY $q2
+    %vec_cond0:_(<4 x s1>) = G_ICMP intpred(eq), %q0, %q1
+    %vec_cond1:_(<4 x s1>) = G_ICMP intpred(eq), %q0, %q2
+    %2:_(s32) = G_CONSTANT i32 4100
+    %6:_(s32) = G_FCONSTANT float 0.000000e+00
+    %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
+    %cmp:_(s1) = G_ICMP intpred(eq), %w0, %2
+    %select:_(<4 x s1>) = G_SELECT %cmp, %vec_cond0, %vec_cond1
+    %zext_select:_(<4 x s32>) = G_ZEXT %select
+    $q0 = COPY %zext_select
+    RET_ReallyLR implicit $q0
+
+...