diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -17634,6 +17634,76 @@
 ``TargetTransformInfo::hasActiveVectorLength()`` returns true when the target
 has native support for %evl.
 
+.. _int_vp_select:
+
+'``llvm.vp.select.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <16 x i32>  @llvm.vp.select.v16i32 (<16 x i1> <mask>, <16 x i32> <on_true>, <16 x i32> <on_false>, i32 <pivot>)
+      declare <vscale x 4 x i64>  @llvm.vp.select.nxv4i64 (<vscale x 4 x i1> <mask>, <vscale x 4 x i32> <on_true>, <vscale x 4 x i32> <on_false>, i32 <pivot>)
+
+Overview:
+"""""""""
+
+The '``llvm.vp.select``' intrinsic is used to choose one value based on a
+condition, without IR-level branching.
+
+Arguments:
+""""""""""
+
+The first operand is a vector of `i1` and indicates the mask.  The second
+operand is the value that is selected when the condition is true.  The third
+operand is the value that is selected when the condition is false.  The fourth
+operand is the pivot.
+
+#. The optional ``fast-math flags`` marker indicates that the select has one or more
+   :ref:`fast-math flags <fastmath>`. These are optimization hints to enable
+   otherwise unsafe floating-point optimizations. Fast-math flags are only valid
+   for selects that return a floating-point scalar or vector type, or an array
+   (nested to any depth) of floating-point scalar or vector types.
+
+Semantics:
+""""""""""
+
+The intrinsic selects lanes from the second and third operand depending on a
+condition.  The pivot creates a mask, %pivot, with all elements ``0 <= i <
+%pivotMask`` set to ``1`` and all others set to ``0``.
+
+::
+
+      M = %mask AND %pivotMask
+
+If the condition is an i1 and it evaluates to 1, the instruction returns
+the first value argument; otherwise, it returns the second value
+argument.
+
+If the condition is a vector of i1, then the value arguments must be
+vectors of the same size, and the selection is done element by element.
+
+If the condition is an i1 and the value arguments are vectors of the
+same size, then an entire vector is selected.
+
+Note that the intrinsic does not have an ``%evl`` parameter.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %r = call <4 x i32> @llvm.vp.select.v4i32(<4 x i1> %m, <4 x i32> %a, <4 x i32> %b, i32 %pivot)
+      ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+      %pivotMask = icmp lgt i32 <0, 1, 2, 3> %lane_idx
+      %M = and <4 x i1> %mask, %pivotMask
+      %also.r = select <4 x i1> %M, <4 x i32> %on_true, <4 x i32> %on_false
+
+
 
 .. _int_vp_add:
 
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1497,6 +1497,12 @@
                                 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                 llvm_i32_ty]>;
 }
+// Shuffles.
+def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+                              [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                LLVMMatchType<0>,
+                                LLVMMatchType<0>,
+                                llvm_i32_ty]>;
 
 def int_get_active_lane_mask:
   DefaultAttrsIntrinsic<[llvm_anyvector_ty],
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -231,6 +231,15 @@
 
 ///// } Memory Operations
 
+///// Shuffles {
+
+// llvm.vp.select(mask,on_true,on_false,pivot)
+BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, None)
+// BEGIN_REGISTER_VP_SDNODE(VP_SELECT, -1, vp_select, 0, 4)
+// END_REGISTER_CASES(vp_select, VP_SELECT)
+END_REGISTER_VP_INTRINSIC(vp_select)
+
+///// } Shuffles
 
 #undef BEGIN_REGISTER_VP
 #undef BEGIN_REGISTER_VP_INTRINSIC
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -476,6 +476,11 @@
   default:
     VPFunc = Intrinsic::getDeclaration(M, VPID, Params[0]->getType());
     break;
+  case Intrinsic::vp_select:
+    VPFunc = Intrinsic::getDeclaration(
+        M, VPID,
+        {Params[1]->getType()});
+    break;
   case Intrinsic::vp_load:
     VPFunc = Intrinsic::getDeclaration(
         M, VPID,
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -46,10 +46,11 @@
       Str << " declare <8 x float> @llvm.vp." << BinaryFPOpcode
           << ".v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) ";
 
-    Str << " declare void @llvm.vp.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, <8 x i1>, i32) ";
-        Str << " declare void @llvm.vp.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, <8 x i1>, i32) ";
-        Str << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x i1>, i32) ";
-        Str << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x i1>, i32) ";
+    Str << " declare void @llvm.vp.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, <8 x i1>, i32) "
+        << " declare void @llvm.vp.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, <8 x i1>, i32) "
+        << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x i1>, i32) "
+        << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x i1>, i32) "
+        << " declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32)";
 
     return parseAssemblyString(Str.str(), Err, C);
   }