Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -861,6 +861,7 @@
   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
                               unsigned NewOp) const;
+  SDValue LowerToPredicatedSelect(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1084,6 +1084,7 @@
   setOperationAction(ISD::LOAD, VT, Custom);
   setOperationAction(ISD::STORE, VT, Custom);
   setOperationAction(ISD::TRUNCATE, VT, Custom);
+  setOperationAction(ISD::VSELECT, VT, Custom);
 }
 
 void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
@@ -3460,6 +3461,8 @@
     return LowerSETCC(Op, DAG);
   case ISD::BR_CC:
     return LowerBR_CC(Op, DAG);
+  case ISD::VSELECT:
+    return LowerToPredicatedSelect(Op, DAG);
   case ISD::SELECT:
     return LowerSELECT(Op, DAG);
   case ISD::SELECT_CC:
@@ -15020,6 +15023,31 @@
   }
 }
 
+static MVT getContainerForFixedLengthMask(EVT VT) {
+  MVT MaskVT;
+  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+  default:
+    llvm_unreachable("unexpected element type for SVE predicate");
+  case MVT::i8:
+    MaskVT = MVT::nxv16i1;
+    break;
+  case MVT::i16:
+  case MVT::f16:
+    MaskVT = MVT::nxv8i1;
+    break;
+  case MVT::i32:
+  case MVT::f32:
+    MaskVT = MVT::nxv4i1;
+    break;
+  case MVT::i64:
+  case MVT::f64:
+    MaskVT = MVT::nxv2i1;
+    break;
+  }
+
+  return MaskVT;
+}
+
 // Return a PTRUE with active lanes corresponding to the extent of VT.
 static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
                                                 EVT VT) {
@@ -15064,27 +15092,7 @@
   // use AArch64SVEPredPattern::all, which can enable the use of unpredicated
   // variants of instructions when available.
 
-  MVT MaskVT;
-  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
-  default:
-    llvm_unreachable("unexpected element type for SVE predicate");
-  case MVT::i8:
-    MaskVT = MVT::nxv16i1;
-    break;
-  case MVT::i16:
-  case MVT::f16:
-    MaskVT = MVT::nxv8i1;
-    break;
-  case MVT::i32:
-  case MVT::f32:
-    MaskVT = MVT::nxv4i1;
-    break;
-  case MVT::i64:
-  case MVT::f64:
-    MaskVT = MVT::nxv2i1;
-    break;
-  }
-
+  MVT MaskVT = getContainerForFixedLengthMask(VT);
   return DAG.getNode(AArch64ISD::PTRUE, DL, MaskVT,
                      DAG.getTargetConstant(PgPattern, DL, MVT::i64));
 }
@@ -15104,6 +15112,25 @@
   return getPredicateForScalableVector(DAG, DL, VT);
 }
 
+static SDValue convertToScalableMask(SelectionDAG &DAG, EVT VT, SDValue V) {
+  assert(VT.isScalableVector() &&
+         "Expected to convert into a scalable vector!");
+  assert(V.getValueType().isFixedLengthVector() &&
+         "Expected a fixed length vector operand!");
+  SDLoc DL(V);
+  EVT ContainerVT = getContainerForFixedLengthVector(DAG, V.getValueType());
+  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+
+  // FIXME: Fixed width vXi1 are not legal types. Insert the zero extended
+  // mask into a scalable vector, and then truncate to nxvXi1. Is there
+  // a better sequence to do this??
+  SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
+                            DAG.getUNDEF(ContainerVT), V, Zero);
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Ins);
+
+  return Trunc;
+}
+
 // Grow V to consume an entire SVE register.
 static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
   assert(VT.isScalableVector() &&
@@ -15237,3 +15264,31 @@
 
   return DAG.getNode(NewOp, DL, VT, Operands);
 }
+
+SDValue
+AArch64TargetLowering::LowerToPredicatedSelect(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  SDLoc DL(Op);
+
+  if (useSVEForFixedLengthVectorVT(VT)) {
+    auto Pg = getPredicateForVector(DAG, DL, VT);
+    auto Mask = Op->getOperand(0);
+    EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+    MVT MaskTy = getContainerForFixedLengthMask(ContainerVT);
+
+    // Combine the select condition with the fixed VL mask.
+    SDValue CombinedMask = DAG.getNode(ISD::AND, DL, MaskTy, Pg,
+                                       convertToScalableMask(DAG, MaskTy, Mask));
+    SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
+    SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
+
+    auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
+                                   CombinedMask, Op1, Op2);
+
+    return convertFromScalableVector(DAG, VT, ScalableRes);
+  }
+
+  assert(VT.isScalableVector() && "Only expect to lower scalable vector select!");
+  return SDValue();
+}
Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
@@ -0,0 +1,176 @@
+; RUN: llc -aarch64-sve-vector-bits-min=128  < %s | FileCheck %s -D#VBYTES=16  -check-prefix=NO_SVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Don't use SVE when its registers are no bigger than NEON.
+; NO_SVE-NOT: ptrue
+
+; Don't use SVE for 64-bit vectors.
+define void @select_v4f16(<4 x half>* %a, <4 x half>* %b, <4 x i1>* %c) #0 {
+  %mask = load <4 x i1>, <4 x i1>* %c
+  %op1 = load <4 x half>, <4 x half>* %a
+  %op2 = load <4 x half>, <4 x half>* %b
+  %sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2
+  store <4 x half> %sel, <4 x half>* %a
+  ret void
+}
+
+; Don't use SVE for 128-bit vectors.
+define void @select_v8f16(<8 x half>* %a, <8 x half>* %b, <8 x i1>* %c) #0 {
+  %mask = load <8 x i1>, <8 x i1>* %c
+  %op1 = load <8 x half>, <8 x half>* %a
+  %op2 = load <8 x half>, <8 x half>* %b
+  %sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2
+  store <8 x half> %sel, <8 x half>* %a
+  ret void
+}
+
+define void @select_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i1>* %c) #0 {
+  %mask = load <16 x i1>, <16 x i1>* %c
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %sel = select <16 x i1> %mask, <16 x half> %op1, <16 x half> %op2
+  store <16 x half> %sel, <16 x half>* %a
+  ret void
+}
+
+define void @select_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i1>* %c) #0 {
+  %mask = load <32 x i1>, <32 x i1>* %c
+  %op1 = load <32 x half>, <32 x half>* %a
+  %op2 = load <32 x half>, <32 x half>* %b
+  %sel = select <32 x i1> %mask, <32 x half> %op1, <32 x half> %op2
+  store <32 x half> %sel, <32 x half>* %a
+  ret void
+}
+
+define void @select_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x i1>* %c) #0 {
+  %mask = load <64 x i1>, <64 x i1>* %c
+  %op1 = load <64 x half>, <64 x half>* %a
+  %op2 = load <64 x half>, <64 x half>* %b
+  %sel = select <64 x i1> %mask, <64 x half> %op1, <64 x half> %op2
+  store <64 x half> %sel, <64 x half>* %a
+  ret void
+}
+
+define void @select_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x i1>* %c) #0 {
+  %mask = load <128 x i1>, <128 x i1>* %c
+  %op1 = load <128 x half>, <128 x half>* %a
+  %op2 = load <128 x half>, <128 x half>* %b
+  %sel = select <128 x i1> %mask, <128 x half> %op1, <128 x half> %op2
+  store <128 x half> %sel, <128 x half>* %a
+  ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define void @select_v2f32(<2 x float>* %a, <2 x float>* %b, <2 x i1>* %c) #0 {
+  %mask = load <2 x i1>, <2 x i1>* %c
+  %op1 = load <2 x float>, <2 x float>* %a
+  %op2 = load <2 x float>, <2 x float>* %b
+  %sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2
+  store <2 x float> %sel, <2 x float>* %a
+  ret void
+}
+
+; Don't use SVE for 128-bit vectors.
+define void @select_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x i1>* %c) #0 {
+  %mask = load <4 x i1>, <4 x i1>* %c
+  %op1 = load <4 x float>, <4 x float>* %a
+  %op2 = load <4 x float>, <4 x float>* %b
+  %sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2
+  store <4 x float> %sel, <4 x float>* %a
+  ret void
+}
+
+define void @select_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i1>* %c) #0 {
+  %mask = load <8 x i1>, <8 x i1>* %c
+  %op1 = load <8 x float>, <8 x float>* %a
+  %op2 = load <8 x float>, <8 x float>* %b
+  %sel = select <8 x i1> %mask, <8 x float> %op1, <8 x float> %op2
+  store <8 x float> %sel, <8 x float>* %a
+  ret void
+}
+
+define void @select_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x i1>* %c) #0 {
+  %mask = load <16 x i1>, <16 x i1>* %c
+  %op1 = load <16 x float>, <16 x float>* %a
+  %op2 = load <16 x float>, <16 x float>* %b
+  %sel = select <16 x i1> %mask, <16 x float> %op1, <16 x float> %op2
+  store <16 x float> %sel, <16 x float>* %a
+  ret void
+}
+
+define void @select_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x i1>* %c) #0 {
+  %mask = load <32 x i1>, <32 x i1>* %c
+  %op1 = load <32 x float>, <32 x float>* %a
+  %op2 = load <32 x float>, <32 x float>* %b
+  %sel = select <32 x i1> %mask, <32 x float> %op1, <32 x float> %op2
+  store <32 x float> %sel, <32 x float>* %a
+  ret void
+}
+
+define void @select_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x i1>* %c) #0 {
+  %mask = load <64 x i1>, <64 x i1>* %c
+  %op1 = load <64 x float>, <64 x float>* %a
+  %op2 = load <64 x float>, <64 x float>* %b
+  %sel = select <64 x i1> %mask, <64 x float> %op1, <64 x float> %op2
+  store <64 x float> %sel, <64 x float>* %a
+  ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define void @select_v1f64(<1 x double>* %a, <1 x double>* %b, <1 x i1>* %c) #0 {
+  %mask = load <1 x i1>, <1 x i1>* %c
+  %op1 = load <1 x double>, <1 x double>* %a
+  %op2 = load <1 x double>, <1 x double>* %b
+  %sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2
+  store <1 x double> %sel, <1 x double>* %a
+  ret void
+}
+
+; Don't use SVE for 128-bit vectors.
+define void @select_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x i1>* %c) #0 {
+  %mask = load <2 x i1>, <2 x i1>* %c
+  %op1 = load <2 x double>, <2 x double>* %a
+  %op2 = load <2 x double>, <2 x double>* %b
+  %sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2
+  store <2 x double> %sel, <2 x double>* %a
+  ret void
+}
+
+define void @select_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x i1>* %c) #0 {
+  %mask = load <4 x i1>, <4 x i1>* %c
+  %op1 = load <4 x double>, <4 x double>* %a
+  %op2 = load <4 x double>, <4 x double>* %b
+  %sel = select <4 x i1> %mask, <4 x double> %op1, <4 x double> %op2
+  store <4 x double> %sel, <4 x double>* %a
+  ret void
+}
+
+define void @select_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x i1>* %c) #0 {
+  %mask = load <8 x i1>, <8 x i1>* %c
+  %op1 = load <8 x double>, <8 x double>* %a
+  %op2 = load <8 x double>, <8 x double>* %b
+  %sel = select <8 x i1> %mask, <8 x double> %op1, <8 x double> %op2
+  store <8 x double> %sel, <8 x double>* %a
+  ret void
+}
+
+define void @select_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x i1>* %c) #0 {
+  %mask = load <16 x i1>, <16 x i1>* %c
+  %op1 = load <16 x double>, <16 x double>* %a
+  %op2 = load <16 x double>, <16 x double>* %b
+  %sel = select <16 x i1> %mask, <16 x double> %op1, <16 x double> %op2
+  store <16 x double> %sel, <16 x double>* %a
+  ret void
+}
+
+define void @select_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x i1>* %c) #0 {
+  %mask = load <32 x i1>, <32 x i1>* %c
+  %op1 = load <32 x double>, <32 x double>* %a
+  %op2 = load <32 x double>, <32 x double>* %b
+  %sel = select <32 x i1> %mask, <32 x double> %op1, <32 x double> %op2
+  store <32 x double> %sel, <32 x double>* %a
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
@@ -0,0 +1,255 @@
+; RUN: llc -aarch64-sve-vector-bits-min=128  < %s | FileCheck %s -D#VBYTES=16  -check-prefix=NO_SVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Don't use SVE when its registers are no bigger than NEON.
+; NO_SVE-NOT: ptrue
+
+; Don't use SVE for 64-bit vectors.
+define void @select_v8i8(<8 x i8>* %a, <8 x i8>* %b, <8 x i1>* %c) #0 {
+;
+  %mask = load <8 x i1>, <8 x i1>* %c
+  %op1 = load <8 x i8>, <8 x i8>* %a
+  %op2 = load <8 x i8>, <8 x i8>* %b
+  %sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2
+  store <8 x i8> %sel, <8 x i8>* %a
+  ret void
+}
+
+define void @select_v16i8(<16 x i8>* %a, <16 x i8>* %b, <16 x i1>* %c) #0 {
+;
+  %mask = load <16 x i1>, <16 x i1>* %c
+  %op1 = load <16 x i8>, <16 x i8>* %a
+  %op2 = load <16 x i8>, <16 x i8>* %b
+  %sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2
+  store <16 x i8> %sel, <16 x i8>* %a
+  ret void
+}
+
+define void @select_v32i8(<32 x i8>* %a, <32 x i8>* %b, <32 x i1>* %c) #0 {
+;
+  %mask = load <32 x i1>, <32 x i1>* %c
+  %op1 = load <32 x i8>, <32 x i8>* %a
+  %op2 = load <32 x i8>, <32 x i8>* %b
+  %sel = select <32 x i1> %mask, <32 x i8> %op1, <32 x i8> %op2
+  store <32 x i8> %sel, <32 x i8>* %a
+  ret void
+}
+
+define void @select_v64i8(<64 x i8>* %a, <64 x i8>* %b, <64 x i1>* %c) #0 {
+;
+  %mask = load <64 x i1>, <64 x i1>* %c
+  %op1 = load <64 x i8>, <64 x i8>* %a
+  %op2 = load <64 x i8>, <64 x i8>* %b
+  %sel = select <64 x i1> %mask, <64 x i8> %op1, <64 x i8> %op2
+  store <64 x i8> %sel, <64 x i8>* %a
+  ret void
+}
+
+define void @select_v128i8(<128 x i8>* %a, <128 x i8>* %b, <128 x i1>* %c) #0 {
+;
+  %mask = load <128 x i1>, <128 x i1>* %c
+  %op1 = load <128 x i8>, <128 x i8>* %a
+  %op2 = load <128 x i8>, <128 x i8>* %b
+  %sel = select <128 x i1> %mask, <128 x i8> %op1, <128 x i8> %op2
+  store <128 x i8> %sel, <128 x i8>* %a
+  ret void
+}
+
+define void @select_v256i8(<256 x i8>* %a, <256 x i8>* %b, <256 x i1>* %c) #0 {
+;
+  %mask = load <256 x i1>, <256 x i1>* %c
+  %op1 = load <256 x i8>, <256 x i8>* %a
+  %op2 = load <256 x i8>, <256 x i8>* %b
+  %sel = select <256 x i1> %mask, <256 x i8> %op1, <256 x i8> %op2
+  store <256 x i8> %sel, <256 x i8>* %a
+  ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define void @select_v4i16(<4 x i16>* %a, <4 x i16>* %b, <4 x i1>* %c) #0 {
+;
+  %mask = load <4 x i1>, <4 x i1>* %c
+  %op1 = load <4 x i16>, <4 x i16>* %a
+  %op2 = load <4 x i16>, <4 x i16>* %b
+  %sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2
+  store <4 x i16> %sel, <4 x i16>* %a
+  ret void
+}
+
+; Don't use SVE for 128-bit vectors.
+define void @select_v8i16(<8 x i16>* %a, <8 x i16>* %b, <8 x i1>* %c) #0 {
+;
+  %mask = load <8 x i1>, <8 x i1>* %c
+  %op1 = load <8 x i16>, <8 x i16>* %a
+  %op2 = load <8 x i16>, <8 x i16>* %b
+  %sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2
+  store <8 x i16> %sel, <8 x i16>* %a
+  ret void
+}
+
+define void @select_v16i16(<16 x i16>* %a, <16 x i16>* %b, <16 x i1>* %c) #0 {
+;
+  %mask = load <16 x i1>, <16 x i1>* %c
+  %op1 = load <16 x i16>, <16 x i16>* %a
+  %op2 = load <16 x i16>, <16 x i16>* %b
+  %sel = select <16 x i1> %mask, <16 x i16> %op1, <16 x i16> %op2
+  store <16 x i16> %sel, <16 x i16>* %a
+  ret void
+}
+
+define void @select_v32i16(<32 x i16>* %a, <32 x i16>* %b, <32 x i1>* %c) #0 {
+;
+  %mask = load <32 x i1>, <32 x i1>* %c
+  %op1 = load <32 x i16>, <32 x i16>* %a
+  %op2 = load <32 x i16>, <32 x i16>* %b
+  %sel = select <32 x i1> %mask, <32 x i16> %op1, <32 x i16> %op2
+  store <32 x i16> %sel, <32 x i16>* %a
+  ret void
+}
+
+define void @select_v64i16(<64 x i16>* %a, <64 x i16>* %b, <64 x i1>* %c) #0 {
+;
+  %mask = load <64 x i1>, <64 x i1>* %c
+  %op1 = load <64 x i16>, <64 x i16>* %a
+  %op2 = load <64 x i16>, <64 x i16>* %b
+  %sel = select <64 x i1> %mask, <64 x i16> %op1, <64 x i16> %op2
+  store <64 x i16> %sel, <64 x i16>* %a
+  ret void
+}
+
+define void @select_v128i16(<128 x i16>* %a, <128 x i16>* %b, <128 x i1>* %c) #0 {
+;
+  %mask = load <128 x i1>, <128 x i1>* %c
+  %op1 = load <128 x i16>, <128 x i16>* %a
+  %op2 = load <128 x i16>, <128 x i16>* %b
+  %sel = select <128 x i1> %mask, <128 x i16> %op1, <128 x i16> %op2
+  store <128 x i16> %sel, <128 x i16>* %a
+  ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define void @select_v2i32(<2 x i32>* %a, <2 x i32>* %b, <2 x i1>* %c) #0 {
+;
+  %mask = load <2 x i1>, <2 x i1>* %c
+  %op1 = load <2 x i32>, <2 x i32>* %a
+  %op2 = load <2 x i32>, <2 x i32>* %b
+  %sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2
+  store <2 x i32> %sel, <2 x i32>* %a
+  ret void
+}
+
+; Don't use SVE for 128-bit vectors.
+define void @select_v4i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i1>* %c) #0 {
+;
+  %mask = load <4 x i1>, <4 x i1>* %c
+  %op1 = load <4 x i32>, <4 x i32>* %a
+  %op2 = load <4 x i32>, <4 x i32>* %b
+  %sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2
+  store <4 x i32> %sel, <4 x i32>* %a
+  ret void
+}
+
+define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b, <8 x i1>* %c) #0 {
+;
+  %mask = load <8 x i1>, <8 x i1>* %c
+  %op1 = load <8 x i32>, <8 x i32>* %a
+  %op2 = load <8 x i32>, <8 x i32>* %b
+  %sel = select <8 x i1> %mask, <8 x i32> %op1, <8 x i32> %op2
+  store <8 x i32> %sel, <8 x i32>* %a
+  ret void
+}
+
+define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b, <16 x i1>* %c) #0 {
+;
+  %mask = load <16 x i1>, <16 x i1>* %c
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %sel = select <16 x i1> %mask, <16 x i32> %op1, <16 x i32> %op2
+  store <16 x i32> %sel, <16 x i32>* %a
+  ret void
+}
+
+define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b, <32 x i1>* %c) #0 {
+;
+  %mask = load <32 x i1>, <32 x i1>* %c
+  %op1 = load <32 x i32>, <32 x i32>* %a
+  %op2 = load <32 x i32>, <32 x i32>* %b
+  %sel = select <32 x i1> %mask, <32 x i32> %op1, <32 x i32> %op2
+  store <32 x i32> %sel, <32 x i32>* %a
+  ret void
+}
+
+define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b, <64 x i1>* %c) #0 {
+;
+  %mask = load <64 x i1>, <64 x i1>* %c
+  %op1 = load <64 x i32>, <64 x i32>* %a
+  %op2 = load <64 x i32>, <64 x i32>* %b
+  %sel = select <64 x i1> %mask, <64 x i32> %op1, <64 x i32> %op2
+  store <64 x i32> %sel, <64 x i32>* %a
+  ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define void @select_v1i64(<1 x i64>* %a, <1 x i64>* %b, <1 x i1>* %c) #0 {
+;
+  %mask = load <1 x i1>, <1 x i1>* %c
+  %op1 = load <1 x i64>, <1 x i64>* %a
+  %op2 = load <1 x i64>, <1 x i64>* %b
+  %sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2
+  store <1 x i64> %sel, <1 x i64>* %a
+  ret void
+}
+
+; Don't use SVE for 128-bit vectors.
+define void @select_v2i64(<2 x i64>* %a, <2 x i64>* %b, <2 x i1>* %c) #0 {
+;
+  %mask = load <2 x i1>, <2 x i1>* %c
+  %op1 = load <2 x i64>, <2 x i64>* %a
+  %op2 = load <2 x i64>, <2 x i64>* %b
+  %sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2
+  store <2 x i64> %sel, <2 x i64>* %a
+  ret void
+}
+
+define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b, <4 x i1>* %c) #0 {
+;
+  %mask = load <4 x i1>, <4 x i1>* %c
+  %op1 = load <4 x i64>, <4 x i64>* %a
+  %op2 = load <4 x i64>, <4 x i64>* %b
+  %sel = select <4 x i1> %mask, <4 x i64> %op1, <4 x i64> %op2
+  store <4 x i64> %sel, <4 x i64>* %a
+  ret void
+}
+
+define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b, <8 x i1>* %c) #0 {
+;
+  %mask = load <8 x i1>, <8 x i1>* %c
+  %op1 = load <8 x i64>, <8 x i64>* %a
+  %op2 = load <8 x i64>, <8 x i64>* %b
+  %sel = select <8 x i1> %mask, <8 x i64> %op1, <8 x i64> %op2
+  store <8 x i64> %sel, <8 x i64>* %a
+  ret void
+}
+
+define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b, <16 x i1>* %c) #0 {
+;
+  %mask = load <16 x i1>, <16 x i1>* %c
+  %op1 = load <16 x i64>, <16 x i64>* %a
+  %op2 = load <16 x i64>, <16 x i64>* %b
+  %sel = select <16 x i1> %mask, <16 x i64> %op1, <16 x i64> %op2
+  store <16 x i64> %sel, <16 x i64>* %a
+  ret void
+}
+
+define void @select_v32i64(<32 x i64>* %a, <32 x i64>* %b, <32 x i1>* %c) #0 {
+;
+  %mask = load <32 x i1>, <32 x i1>* %c
+  %op1 = load <32 x i64>, <32 x i64>* %a
+  %op2 = load <32 x i64>, <32 x i64>* %b
+  %sel = select <32 x i1> %mask, <32 x i64> %op1, <32 x i64> %op2
+  store <32 x i64> %sel, <32 x i64>* %a
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve" }