diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -173,20 +173,13 @@
   // This builtin has a granted vector length parameter.
   bit HasVL = true;
 
-  // There are several cases for specifying tail policy.
-  // 1. Add tail policy argument to masked intrinsics. It may have the maskedoff
-  //    argument or not.
-  //  * Have the maskedoff argument: (HasPolicy = true, HasMaskedOffOperand = true)
-  //    Ex: vadd_vv_i8m1_mt(mask, maskedoff, op1, op2, vl, ta);
-  //  * Do not have the maskedoff argument: (HasPolicy = true, HasMaskedOffOperand = false)
-  //    Ex: vmacc_vv_i8m1_mt(mask, vd, vs1, vs2, vl, ta);
-  // 2. Add dest argument for no mask intrinsics. (TODO)
-  //    Ex: vmv_v_x_i8m1_t(dest, src, vl);
-  // 3. Always tail agnostic. (HasPolicy = false)
-  //    Ex: vmseq_vv_i8m1_b8_m(mask, maskedoff, op1, op2, vl);
-  // The tail policy argument is located at the last position.
+  // The masked intrinsic IR have the policy operand.
+  // The policy argument is located at the last position.
   bit HasPolicy = true;
 
+  // The nomask intrinsic IR have the passthru operand.
+  bit HasNoMaskPassThru = false;
+
   // This builtin supports non-masked function overloading api.
   // All masked operations support overloading api.
   bit HasNoMaskedOverloaded = true;
@@ -1290,6 +1283,8 @@
                           cast<llvm::VectorType>(ResultType)->getElementType(),
                           Ops[1]->getType()};
         Ops.insert(Ops.begin() + 1, llvm::Constant::getNullValue(IntrinsicTypes[1]));
+        // insert undef passthru
+        Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType));
         break;
       }
       }],
@@ -1321,6 +1316,8 @@
                           Ops[1]->getType()};
         Ops.insert(Ops.begin() + 1,
                    llvm::Constant::getAllOnesValue(IntrinsicTypes[1]));
+        // insert undef passthru
+        Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType));
         break;
       }
       }],
@@ -1369,6 +1366,8 @@
         IntrinsicTypes = {ResultType,
                           Ops[0]->getType(), Ops[1]->getType()};
         Ops.insert(Ops.begin() + 1, Ops[0]);
+        // insert undef passthru
+        Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType));
         break;
       }
       }],
@@ -1402,6 +1401,8 @@
                           cast<llvm::VectorType>(Ops[0]->getType())->getElementType(),
                           Ops[1]->getType()};
         Ops.insert(Ops.begin() + 1, llvm::Constant::getNullValue(IntrinsicTypes[2]));
+        // insert undef passthru
+        Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType));
         break;
       }
       }],
@@ -1438,6 +1439,8 @@
                           Ops[1]->getType(),
                           Ops[1]->getType()};
         Ops.insert(Ops.begin() + 1, llvm::Constant::getNullValue(IntrinsicTypes[2]));
+        // insert undef passthru
+        Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType));
         break;
       }
       }],
@@ -1583,15 +1586,18 @@
 
 // 12. Vector Integer Arithmetic Instructions
 // 12.1. Vector Single-Width Integer Add and Subtract
+let HasNoMaskPassThru = true in {
 defm vadd : RVVIntBinBuiltinSet;
 defm vsub : RVVIntBinBuiltinSet;
 defm vrsub : RVVOutOp1BuiltinSet<"vrsub", "csil",
                                  [["vx", "v", "vve"],
                                   ["vx", "Uv", "UvUvUe"]]>;
+}
 defm vneg_v : RVVPseudoUnaryBuiltin<"vrsub", "csil">;
 
 // 12.2. Vector Widening Integer Add/Subtract
 // Widening unsigned integer add/subtract, 2*SEW = SEW +/- SEW
+let HasNoMaskPassThru = true in {
 defm vwaddu : RVVUnsignedWidenBinBuiltinSet;
 defm vwsubu : RVVUnsignedWidenBinBuiltinSet;
 // Widening signed integer add/subtract, 2*SEW = SEW +/- SEW
@@ -1603,6 +1609,7 @@
 // Widening signed integer add/subtract, 2*SEW = 2*SEW +/- SEW
 defm vwadd : RVVSignedWidenOp0BinBuiltinSet;
 defm vwsub : RVVSignedWidenOp0BinBuiltinSet;
+}
 defm vwcvtu_x_x_v : RVVPseudoVWCVTBuiltin<"vwaddu", "vwcvtu_x", "csi",
                                           [["Uw", "UwUv"]]>;
 defm vwcvt_x_x_v : RVVPseudoVWCVTBuiltin<"vwadd", "vwcvt_x", "csi",
@@ -1633,12 +1640,15 @@
 }
 
 // 12.5. Vector Bitwise Logical Instructions
+let HasNoMaskPassThru = true in {
 defm vand : RVVIntBinBuiltinSet;
 defm vxor : RVVIntBinBuiltinSet;
 defm vor : RVVIntBinBuiltinSet;
+}
 defm vnot_v : RVVPseudoVNotBuiltin<"vxor", "csil">;
 
 // 12.6. Vector Single-Width Bit Shift Instructions
+let HasNoMaskPassThru = true in {
 defm vsll : RVVShiftBuiltinSet;
 defm vsrl : RVVUnsignedShiftBuiltinSet;
 defm vsra : RVVSignedShiftBuiltinSet;
@@ -1646,6 +1656,7 @@
 // 12.7. Vector Narrowing Integer Right Shift Instructions
 defm vnsrl : RVVUnsignedNShiftBuiltinSet;
 defm vnsra : RVVSignedNShiftBuiltinSet;
+}
 defm vncvt_x_x_w : RVVPseudoVNCVTBuiltin<"vnsrl", "vncvt_x", "csi",
                                          [["v", "vw"],
                                           ["Uv", "UvUw"]]>;
@@ -1665,6 +1676,7 @@
 }
 
 // 12.9. Vector Integer Min/Max Instructions
+let HasNoMaskPassThru = true in {
 defm vminu : RVVUnsignedBinBuiltinSet;
 defm vmin : RVVSignedBinBuiltinSet;
 defm vmaxu : RVVUnsignedBinBuiltinSet;
@@ -1685,9 +1697,10 @@
 defm vdiv : RVVSignedBinBuiltinSet;
 defm vremu : RVVUnsignedBinBuiltinSet;
 defm vrem : RVVSignedBinBuiltinSet;
+}
 
 // 12.12. Vector Widening Integer Multiply Instructions
-let Log2LMUL = [-3, -2, -1, 0, 1, 2] in {
+let Log2LMUL = [-3, -2, -1, 0, 1, 2], HasNoMaskPassThru = true in {
 defm vwmul : RVVOutOp0Op1BuiltinSet<"vwmul", "csi",
                                     [["vv", "w", "wvv"],
                                      ["vx", "w", "wve"]]>;
@@ -1751,6 +1764,7 @@
 
 // 13. Vector Fixed-Point Arithmetic Instructions
 // 13.1. Vector Single-Width Saturating Add and Subtract
+let HasNoMaskPassThru = true in {
 defm vsaddu : RVVUnsignedBinBuiltinSet;
 defm vsadd : RVVSignedBinBuiltinSet;
 defm vssubu : RVVUnsignedBinBuiltinSet;
@@ -1800,6 +1814,7 @@
                                        [["vv", "w", "wvv"],
                                         ["vf", "w", "wve"]]>;
 }
+}
 
 // 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
 defm vfmacc  : RVVFloatingTerBuiltinSet;
@@ -1827,6 +1842,7 @@
 def vfrec7 : RVVFloatingUnaryVVBuiltin;
 
 // 14.11. Vector Floating-Point MIN/MAX Instructions
+let HasNoMaskPassThru = true in {
 defm vfmin : RVVFloatingBinBuiltinSet;
 defm vfmax : RVVFloatingBinBuiltinSet;
 
@@ -1834,6 +1850,7 @@
 defm vfsgnj  : RVVFloatingBinBuiltinSet;
 defm vfsgnjn : RVVFloatingBinBuiltinSet;
 defm vfsgnjx : RVVFloatingBinBuiltinSet;
+}
 defm vfneg_v : RVVPseudoVFUnaryBuiltin<"vfsgnjn", "xfd">;
 defm vfabs_v : RVVPseudoVFUnaryBuiltin<"vfsgnjx", "xfd">;
 
@@ -2005,6 +2022,7 @@
 defm vslidedown : RVVSlideBuiltinSet;
 
 // 17.3.3. Vector Slide1up Instructions
+let HasNoMaskPassThru = true in {
 defm vslide1up : RVVSlideOneBuiltinSet;
 defm vfslide1up : RVVFloatingBinVFBuiltinSet;
 
@@ -2027,6 +2045,7 @@
                                  [["vx", "Uv", "UvUvz"]]>;
 defm vrgatherei16 : RVVOutBuiltinSet<"vrgatherei16_vv", "csil",
                                      [["vv", "Uv", "UvUv(Log2EEW:4)Uv"]]>;
+}
 
 // 17.5. Vector Compress Instruction
 let HasMask = false, HasPolicy = false,
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -162,6 +162,7 @@
   bool IsMask;
   bool HasVL;
   bool HasPolicy;
+  bool HasNoMaskPassThru;
   bool HasNoMaskedOverloaded;
   bool HasAutoDef; // There is automiatic definition in header
   std::string ManualCodegen;
@@ -177,8 +178,8 @@
   RVVIntrinsic(StringRef Name, StringRef Suffix, StringRef MangledName,
                StringRef MangledSuffix, StringRef IRName, bool IsMask,
                bool HasMaskedOffOperand, bool HasVL, bool HasPolicy,
-               bool HasNoMaskedOverloaded, bool HasAutoDef,
-               StringRef ManualCodegen, const RVVTypes &Types,
+               bool HasNoMaskPassThru, bool HasNoMaskedOverloaded,
+               bool HasAutoDef, StringRef ManualCodegen, const RVVTypes &Types,
                const std::vector<int64_t> &IntrinsicTypes,
                const std::vector<StringRef> &RequiredFeatures, unsigned NF);
   ~RVVIntrinsic() = default;
@@ -188,6 +189,7 @@
   StringRef getMangledName() const { return MangledName; }
   bool hasVL() const { return HasVL; }
   bool hasPolicy() const { return HasPolicy; }
+  bool hasNoMaskPassThru() const { return HasNoMaskPassThru; }
   bool hasNoMaskedOverloaded() const { return HasNoMaskedOverloaded; }
   bool hasManualCodegen() const { return !ManualCodegen.empty(); }
   bool hasAutoDef() const { return HasAutoDef; }
@@ -770,12 +772,14 @@
                            StringRef NewMangledName, StringRef MangledSuffix,
                            StringRef IRName, bool IsMask,
                            bool HasMaskedOffOperand, bool HasVL, bool HasPolicy,
-                           bool HasNoMaskedOverloaded, bool HasAutoDef,
-                           StringRef ManualCodegen, const RVVTypes &OutInTypes,
+                           bool HasNoMaskPassThru, bool HasNoMaskedOverloaded,
+                           bool HasAutoDef, StringRef ManualCodegen,
+                           const RVVTypes &OutInTypes,
                            const std::vector<int64_t> &NewIntrinsicTypes,
                            const std::vector<StringRef> &RequiredFeatures,
                            unsigned NF)
     : IRName(IRName), IsMask(IsMask), HasVL(HasVL), HasPolicy(HasPolicy),
+      HasNoMaskPassThru(HasNoMaskPassThru),
       HasNoMaskedOverloaded(HasNoMaskedOverloaded), HasAutoDef(HasAutoDef),
       ManualCodegen(ManualCodegen.str()), NF(NF) {
 
@@ -823,7 +827,7 @@
   // IntrinsicTypes is nonmasked version index. Need to update it
   // if there is maskedoff operand (It is always in first operand).
   IntrinsicTypes = NewIntrinsicTypes;
-  if (IsMask && HasMaskedOffOperand) {
+  if ((IsMask && HasMaskedOffOperand) || (!IsMask && HasNoMaskPassThru)) {
     for (auto &I : IntrinsicTypes) {
       if (I >= 0)
         I += NF;
@@ -860,6 +864,9 @@
     } else {
       OS << "  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());\n";
     }
+  } else if (hasNoMaskPassThru()) {
+    OS << "  Ops.push_back(llvm::UndefValue::get(ResultType));\n";
+    OS << "  std::rotate(Ops.rbegin(), Ops.rbegin() + 1,  Ops.rend());\n";
   }
 
   OS << "  IntrinsicTypes = {";
@@ -1107,6 +1114,8 @@
       PrintFatalError("Builtin with same name has different HasPolicy");
     else if (P.first->second->hasPolicy() != Def->hasPolicy())
       PrintFatalError("Builtin with same name has different HasPolicy");
+    else if (P.first->second->hasNoMaskPassThru() != Def->hasNoMaskPassThru())
+      PrintFatalError("Builtin with same name has different HasNoMaskPassThru");
     else if (P.first->second->getIntrinsicTypes() != Def->getIntrinsicTypes())
       PrintFatalError("Builtin with same name has different IntrinsicTypes");
   }
@@ -1154,6 +1163,7 @@
     bool HasMaskedOffOperand = R->getValueAsBit("HasMaskedOffOperand");
     bool HasVL = R->getValueAsBit("HasVL");
     bool HasPolicy = R->getValueAsBit("HasPolicy");
+    bool HasNoMaskPassThru = R->getValueAsBit("HasNoMaskPassThru");
     bool HasNoMaskedOverloaded = R->getValueAsBit("HasNoMaskedOverloaded");
     std::vector<int64_t> Log2LMULList = R->getValueAsListOfInts("Log2LMUL");
     StringRef ManualCodegen = R->getValueAsString("ManualCodegen");
@@ -1228,8 +1238,8 @@
         Out.push_back(std::make_unique<RVVIntrinsic>(
             Name, SuffixStr, MangledName, MangledSuffixStr, IRName,
             /*IsMask=*/false, /*HasMaskedOffOperand=*/false, HasVL, HasPolicy,
-            HasNoMaskedOverloaded, HasAutoDef, ManualCodegen, Types.getValue(),
-            IntrinsicTypes, RequiredFeatures, NF));
+            HasNoMaskPassThru, HasNoMaskedOverloaded, HasAutoDef, ManualCodegen,
+            Types.getValue(), IntrinsicTypes, RequiredFeatures, NF));
         if (HasMask) {
           // Create a mask intrinsic
           Optional<RVVTypes> MaskTypes =
@@ -1237,8 +1247,9 @@
           Out.push_back(std::make_unique<RVVIntrinsic>(
               Name, SuffixStr, MangledName, MangledSuffixStr, IRNameMask,
               /*IsMask=*/true, HasMaskedOffOperand, HasVL, HasPolicy,
-              HasNoMaskedOverloaded, HasAutoDef, ManualCodegenMask,
-              MaskTypes.getValue(), IntrinsicTypes, RequiredFeatures, NF));
+              HasNoMaskPassThru, HasNoMaskedOverloaded, HasAutoDef,
+              ManualCodegenMask, MaskTypes.getValue(), IntrinsicTypes,
+              RequiredFeatures, NF));
         }
       } // end for Log2LMULList
     }   // end for TypeRange
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -383,12 +383,13 @@
     let VLOperand = 2;
   }
   // For destination vector type is the same as first and second source vector.
-  // Input: (vector_in, int_vector_in, vl)
+  // Input: (passthru, vector_in, int_vector_in, vl)
   class RISCVRGatherVVNoMask
         : Intrinsic<[llvm_anyvector_ty],
-                    [LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty],
+                    [LLVMMatchType<0>, LLVMMatchType<0>,
+                     LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty],
                     [IntrNoMem]>, RISCVVIntrinsic {
-    let VLOperand = 2;
+    let VLOperand = 3;
   }
   // For destination vector type is the same as first and second source vector.
   // Input: (vector_in, vector_in, int_vector_in, vl, ta)
@@ -400,13 +401,14 @@
                     [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
     let VLOperand = 4;
   }
-  // Input: (vector_in, int16_vector_in, vl)
+  // Input: (passthru, vector_in, int16_vector_in, vl)
   class RISCVRGatherEI16VVNoMask
         : Intrinsic<[llvm_anyvector_ty],
-                    [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>,
+                    [LLVMMatchType<0>, LLVMMatchType<0>,
+                     LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>,
                      llvm_anyint_ty],
                     [IntrNoMem]>, RISCVVIntrinsic {
-    let VLOperand = 2;
+    let VLOperand = 3;
   }
   // For destination vector type is the same as first and second source vector.
   // Input: (vector_in, vector_in, int16_vector_in, vl, ta)
@@ -421,12 +423,13 @@
   }
   // For destination vector type is the same as first source vector, and the
   // second operand is XLen.
-  // Input: (vector_in, xlen_in, vl)
+  // Input: (passthru, vector_in, xlen_in, vl)
   class RISCVGatherVXNoMask
         : Intrinsic<[llvm_anyvector_ty],
-                    [LLVMMatchType<0>, llvm_anyint_ty, LLVMMatchType<1>],
+                    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, 
+                     LLVMMatchType<1>],
                     [IntrNoMem]>, RISCVVIntrinsic {
-    let VLOperand = 2;
+    let VLOperand = 3;
   }
   // For destination vector type is the same as first source vector (with mask).
   // Second operand is XLen.
@@ -440,13 +443,14 @@
     let VLOperand = 4;
   }
   // For destination vector type is the same as first source vector.
-  // Input: (vector_in, vector_in/scalar_in, vl)
+  // Input: (passthru, vector_in, vector_in/scalar_in, vl)
   class RISCVBinaryAAXNoMask
         : Intrinsic<[llvm_anyvector_ty],
-                    [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty],
+                    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+                     llvm_anyint_ty],
                     [IntrNoMem]>, RISCVVIntrinsic {
-    let SplatOperand = 1;
-    let VLOperand = 2;
+    let SplatOperand = 2;
+    let VLOperand = 3;
   }
   // For destination vector type is the same as first source vector (with mask).
   // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
@@ -461,12 +465,13 @@
   }
   // For destination vector type is the same as first source vector. The
   // second source operand must match the destination type or be an XLen scalar.
-  // Input: (vector_in, vector_in/scalar_in, vl)
+  // Input: (passthru, vector_in, vector_in/scalar_in, vl)
   class RISCVBinaryAAShiftNoMask
         : Intrinsic<[llvm_anyvector_ty],
-                    [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty],
+                    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+                     llvm_anyint_ty],
                     [IntrNoMem]>, RISCVVIntrinsic {
-    let VLOperand = 2;
+    let VLOperand = 3;
   }
   // For destination vector type is the same as first source vector (with mask).
   // The second source operand must match the destination type or be an XLen scalar.
@@ -480,13 +485,14 @@
     let VLOperand = 4;
   }
   // For destination vector type is NOT the same as first source vector.
-  // Input: (vector_in, vector_in/scalar_in, vl)
+  // Input: (passthru, vector_in, vector_in/scalar_in, vl)
   class RISCVBinaryABXNoMask
         : Intrinsic<[llvm_anyvector_ty],
-                    [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
+                    [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+                     llvm_anyint_ty],
                     [IntrNoMem]>, RISCVVIntrinsic {
-    let SplatOperand = 1;
-    let VLOperand = 2;
+    let SplatOperand = 2;
+    let VLOperand = 3;
   }
   // For destination vector type is NOT the same as first source vector (with mask).
   // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
@@ -501,12 +507,13 @@
   }
   // For destination vector type is NOT the same as first source vector. The
   // second source operand must match the destination type or be an XLen scalar.
-  // Input: (vector_in, vector_in/scalar_in, vl)
+  // Input: (passthru, vector_in, vector_in/scalar_in, vl)
   class RISCVBinaryABShiftNoMask
         : Intrinsic<[llvm_anyvector_ty],
-                    [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
+                    [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+                     llvm_anyint_ty],
                     [IntrNoMem]>, RISCVVIntrinsic {
-    let VLOperand = 2;
+    let VLOperand = 3;
   }
   // For destination vector type is NOT the same as first source vector (with mask).
   // The second source operand must match the destination type or be an XLen scalar.
@@ -595,13 +602,14 @@
   }
   // For Saturating binary operations.
   // The destination vector type is the same as first source vector.
-  // Input: (vector_in, vector_in/scalar_in, vl)
+  // Input: (passthru, vector_in, vector_in/scalar_in, vl)
   class RISCVSaturatingBinaryAAXNoMask
         : Intrinsic<[llvm_anyvector_ty],
-                    [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty],
+                    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+                     llvm_anyint_ty],
                     [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
-    let SplatOperand = 1;
-    let VLOperand = 2;
+    let SplatOperand = 2;
+    let VLOperand = 3;
   }
   // For Saturating binary operations with mask.
   // The destination vector type is the same as first source vector.
@@ -618,12 +626,13 @@
   // For Saturating binary operations.
   // The destination vector type is the same as first source vector.
   // The second source operand matches the destination type or is an XLen scalar.
-  // Input: (vector_in, vector_in/scalar_in, vl)
+  // Input: (passthru, vector_in, vector_in/scalar_in, vl)
   class RISCVSaturatingBinaryAAShiftNoMask
         : Intrinsic<[llvm_anyvector_ty],
-                    [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty],
+                    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+                     llvm_anyint_ty],
                     [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
-    let VLOperand = 2;
+    let VLOperand = 3;
   }
   // For Saturating binary operations with mask.
   // The destination vector type is the same as first source vector.
@@ -640,12 +649,13 @@
   // For Saturating binary operations.
   // The destination vector type is NOT the same as first source vector.
   // The second source operand matches the destination type or is an XLen scalar.
-  // Input: (vector_in, vector_in/scalar_in, vl)
+  // Input: (passthru, vector_in, vector_in/scalar_in, vl)
   class RISCVSaturatingBinaryABShiftNoMask
         : Intrinsic<[llvm_anyvector_ty],
-                    [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
+                    [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+                     llvm_anyint_ty],
                     [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
-    let VLOperand = 2;
+    let VLOperand = 3;
   }
   // For Saturating binary operations with mask.
   // The destination vector type is NOT the same as first source vector (with mask).
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -155,9 +155,9 @@
   // and the fifth the VL.
   VSLIDEUP_VL,
   VSLIDEDOWN_VL,
-  // Matches the semantics of vslide1up/slide1down. The first operand is the
-  // source vector, the second is the XLenVT scalar value. The third and fourth
-  // operands are the mask and VL operands.
+  // Matches the semantics of vslide1up/slide1down. The first operand is
+  // passthru operand, the second is source vector, third is the XLenVT scalar
+  // value. The fourth and fifth operands are the mask and VL operands.
   VSLIDE1UP_VL,
   VSLIDE1DOWN_VL,
   // Matches the semantics of the vid.v instruction, with a mask and VL
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "RISCVISelLowering.h"
+#include "MCTargetDesc/RISCVBaseInfo.h"
 #include "MCTargetDesc/RISCVMatInt.h"
 #include "RISCV.h"
 #include "RISCVMachineFunctionInfo.h"
@@ -4475,10 +4476,12 @@
     ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
                            InsertI64VL);
     // First slide in the hi value, then the lo in underneath it.
-    ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
-                           ValHi, I32Mask, InsertI64VL);
-    ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
-                           ValLo, I32Mask, InsertI64VL);
+    ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT,
+                           DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
+                           I32Mask, InsertI64VL);
+    ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT,
+                           DAG.getUNDEF(I32ContainerVT), ValInVec, ValLo,
+                           I32Mask, InsertI64VL);
     // Bitcast back to the right container type.
     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
   }
@@ -4774,8 +4777,7 @@
     // We need to special case these when the scalar is larger than XLen.
     unsigned NumOps = Op.getNumOperands();
     bool IsMasked = NumOps == 7;
-    unsigned OpOffset = IsMasked ? 1 : 0;
-    SDValue Scalar = Op.getOperand(2 + OpOffset);
+    SDValue Scalar = Op.getOperand(3);
     if (Scalar.getValueType().bitsLE(XLenVT))
       break;
 
@@ -4790,7 +4792,7 @@
 
     // Convert the vector source to the equivalent nxvXi32 vector.
     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
-    SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
+    SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(2));
 
     SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
                                    DAG.getConstant(0, DL, XLenVT));
@@ -4807,17 +4809,34 @@
 
     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
     // instructions.
-    if (IntNo == Intrinsic::riscv_vslide1up ||
-        IntNo == Intrinsic::riscv_vslide1up_mask) {
-      Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
-                        I32Mask, I32VL);
-      Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
-                        I32Mask, I32VL);
+    SDValue Passthru = DAG.getBitcast(I32VT, Op.getOperand(1));
+    if (!IsMasked) {
+      if (IntNo == Intrinsic::riscv_vslide1up) {
+        Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
+                          ScalarHi, I32Mask, I32VL);
+        Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
+                          ScalarLo, I32Mask, I32VL);
+      } else {
+        Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
+                          ScalarLo, I32Mask, I32VL);
+        Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
+                          ScalarHi, I32Mask, I32VL);
+      }
     } else {
-      Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
-                        I32Mask, I32VL);
-      Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
-                        I32Mask, I32VL);
+      // TODO Those VSLIDE1 could be TAMA because we use vmerge to select
+      // maskedoff
+      SDValue Undef = DAG.getUNDEF(I32VT);
+      if (IntNo == Intrinsic::riscv_vslide1up_mask) {
+        Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Undef, Vec,
+                          ScalarHi, I32Mask, I32VL);
+        Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Undef, Vec,
+                          ScalarLo, I32Mask, I32VL);
+      } else {
+        Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Undef, Vec,
+                          ScalarLo, I32Mask, I32VL);
+        Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Undef, Vec,
+                          ScalarHi, I32Mask, I32VL);
+      }
     }
 
     // Convert back to nxvXi64.
@@ -4825,11 +4844,21 @@
 
     if (!IsMasked)
       return Vec;
-
     // Apply mask after the operation.
     SDValue Mask = Op.getOperand(NumOps - 3);
     SDValue MaskedOff = Op.getOperand(1);
-    return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
+    // Assume Policy operand is the last operand.
+    uint64_t Policy = Op.getConstantOperandVal(NumOps - 1);
+    // We don't need to select maskedoff if it's undef.
+    if (MaskedOff.isUndef())
+      return Vec;
+    // TAMU
+    if (Policy == RISCVII::TAIL_AGNOSTIC)
+      return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
+                         VL);
+    // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
+    // It's fine because vmerge does not care mask policy.
+    return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, VL);
   }
   }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -996,6 +996,24 @@
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
+class VPseudoBinaryNoMaskTU<VReg RetClass,
+                            VReg Op1Class,
+                            DAGOperand Op2Class,
+                            string Constraint> :
+        Pseudo<(outs RetClass:$rd),
+               (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+        RISCVVPseudo {
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+  let HasVLOp = 1;
+  let HasSEWOp = 1;
+  let HasDummyMask = 1;
+  let HasMergeOp = 1;
+  let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
 // Special version of VPseudoBinaryNoMask where we pretend the first source is
 // tied to the destination.
 // This allows maskedoff and rs2 to be the same register.
@@ -1017,6 +1035,25 @@
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
+class VPseudoTiedBinaryNoMaskTU<VReg RetClass,
+                                DAGOperand Op2Class,
+                                string Constraint> :
+        Pseudo<(outs RetClass:$rd),
+               (ins RetClass:$merge,
+                    Op2Class:$rs1,
+                    AVL:$vl, ixlenimm:$sew), []>,
+        RISCVVPseudo {
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+  let HasVLOp = 1;
+  let HasSEWOp = 1;
+  let HasMergeOp = 0; // Merge is also rs2.
+  let HasDummyMask = 1;
+  let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
 class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
                           bit Ordered>:
       Pseudo<(outs),
@@ -1652,6 +1689,8 @@
   let VLMul = MInfo.value in {
     def "_" # MInfo.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
                                              Constraint>;
+    def "_" # MInfo.MX # "_TU" : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
+                                                       Constraint>;
     def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
                                                            Constraint>,
                                    RISCVMaskedPseudo</*MaskOpIdx*/ 3>;
@@ -1681,6 +1720,8 @@
   let VLMul = lmul.value in {
     def "_" # lmul.MX # "_" # emul.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
                                                             Constraint>;
+    def "_" # lmul.MX # "_" # emul.MX # "_TU": VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
+                                                                     Constraint>;
     def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
                                                                           Constraint>;
   }
@@ -1693,6 +1734,8 @@
   let VLMul = MInfo.value in {
     def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask<RetClass, Op2Class,
                                                           Constraint>;
+    def "_" # MInfo.MX # "_TIED_TU": VPseudoTiedBinaryNoMaskTU<RetClass, Op2Class,
+                                                               Constraint>;
     def "_" # MInfo.MX # "_MASK_TIED" : VPseudoTiedBinaryMask<RetClass, Op2Class,
                                                          Constraint>;
   }
@@ -2820,14 +2863,14 @@
                    (mask_type VR:$rs2),
                    GPR:$vl, sew)>;
 
-class VPatBinaryNoMask<string intrinsic_name,
-                       string inst,
-                       ValueType result_type,
-                       ValueType op1_type,
-                       ValueType op2_type,
-                       int sew,
-                       VReg op1_reg_class,
-                       DAGOperand op2_kind> :
+class VPatBinaryM<string intrinsic_name,
+                  string inst,
+                  ValueType result_type,
+                  ValueType op1_type,
+                  ValueType op2_type,
+                  int sew,
+                  VReg op1_reg_class,
+                  DAGOperand op2_kind> :
   Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
                    (op1_type op1_reg_class:$rs1),
                    (op2_type op2_kind:$rs2),
@@ -2837,6 +2880,44 @@
                    (op2_type op2_kind:$rs2),
                    GPR:$vl, sew)>;
 
+class VPatBinaryNoMaskTA<string intrinsic_name,
+                         string inst,
+                         ValueType result_type,
+                         ValueType op1_type,
+                         ValueType op2_type,
+                         int sew,
+                         VReg op1_reg_class,
+                         DAGOperand op2_kind> :
+  Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+                   (result_type (undef)),
+                   (op1_type op1_reg_class:$rs1),
+                   (op2_type op2_kind:$rs2),
+                   VLOpFrag)),
+                   (!cast<Instruction>(inst)
+                   (op1_type op1_reg_class:$rs1),
+                   (op2_type op2_kind:$rs2),
+                   GPR:$vl, sew)>;
+
+class VPatBinaryNoMaskTU<string intrinsic_name,
+                         string inst,
+                         ValueType result_type,
+                         ValueType op1_type,
+                         ValueType op2_type,
+                         int sew,
+                         VReg result_reg_class,
+                         VReg op1_reg_class,
+                         DAGOperand op2_kind> :
+  Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+                   (result_type result_reg_class:$merge),
+                   (op1_type op1_reg_class:$rs1),
+                   (op2_type op2_kind:$rs2),
+                   VLOpFrag)),
+                   (!cast<Instruction>(inst#"_TU")
+                   (result_type result_reg_class:$merge),
+                   (op1_type op1_reg_class:$rs1),
+                   (op2_type op2_kind:$rs2),
+                   GPR:$vl, sew)>;
+
 // Same as above but source operands are swapped.
 class VPatBinaryNoMaskSwapped<string intrinsic_name,
                               string inst,
@@ -2930,6 +3011,7 @@
                            VReg result_reg_class,
                            DAGOperand op2_kind> :
   Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+                   (result_type (undef)),
                    (result_type result_reg_class:$rs1),
                    (op2_type op2_kind:$rs2),
                    VLOpFrag)),
@@ -2938,6 +3020,23 @@
                    (op2_type op2_kind:$rs2),
                    GPR:$vl, sew)>;
 
+class VPatTiedBinaryNoMaskTU<string intrinsic_name,
+                             string inst,
+                             ValueType result_type,
+                             ValueType op2_type,
+                             int sew,
+                             VReg result_reg_class,
+                             DAGOperand op2_kind> :
+  Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+                   (result_type result_reg_class:$merge),
+                   (result_type result_reg_class:$merge),
+                   (op2_type op2_kind:$rs2),
+                   VLOpFrag)),
+                   (!cast<Instruction>(inst#"_TIED_TU")
+                   (result_type result_reg_class:$merge),
+                   (op2_type op2_kind:$rs2),
+                   GPR:$vl, sew)>;
+
 class VPatTiedBinaryMask<string intrinsic_name,
                          string inst,
                          ValueType result_type,
@@ -3147,7 +3246,7 @@
                         GPR:$vl, mti.Log2SEW)>;
 }
 
-multiclass VPatBinary<string intrinsic,
+multiclass VPatBinaryM<string intrinsic,
                       string inst,
                       ValueType result_type,
                       ValueType op1_type,
@@ -3158,8 +3257,8 @@
                       VReg op1_reg_class,
                       DAGOperand op2_kind>
 {
-  def : VPatBinaryNoMask<intrinsic, inst, result_type, op1_type, op2_type,
-                         sew, op1_reg_class, op2_kind>;
+  def : VPatBinaryM<intrinsic, inst, result_type, op1_type, op2_type,
+                    sew, op1_reg_class, op2_kind>;
   def : VPatBinaryMask<intrinsic, inst, result_type, op1_type, op2_type,
                        mask_type, sew, result_reg_class, op1_reg_class,
                        op2_kind>;
@@ -3176,8 +3275,10 @@
                         VReg op1_reg_class,
                         DAGOperand op2_kind>
 {
-  def : VPatBinaryNoMask<intrinsic, inst, result_type, op1_type, op2_type,
-                         sew, op1_reg_class, op2_kind>;
+  def : VPatBinaryNoMaskTA<intrinsic, inst, result_type, op1_type, op2_type,
+                           sew, op1_reg_class, op2_kind>;
+  def : VPatBinaryNoMaskTU<intrinsic, inst, result_type, op1_type, op2_type,
+                           sew, result_reg_class, op1_reg_class, op2_kind>;
   def : VPatBinaryMaskTA<intrinsic, inst, result_type, op1_type, op2_type,
                          mask_type, sew, result_reg_class, op1_reg_class,
                          op2_kind>;
@@ -3349,9 +3450,9 @@
 
 multiclass VPatBinaryM_MM<string intrinsic, string instruction> {
   foreach mti = AllMasks in
-    def : VPatBinaryNoMask<intrinsic, instruction # "_MM_" # mti.LMul.MX,
-                           mti.Mask, mti.Mask, mti.Mask,
-                           mti.Log2SEW, VR, VR>;
+    def : VPatBinaryM<intrinsic, instruction # "_MM_" # mti.LMul.MX,
+                      mti.Mask, mti.Mask, mti.Mask,
+                      mti.Log2SEW, VR, VR>;
 }
 
 multiclass VPatBinaryW_VV<string intrinsic, string instruction,
@@ -3387,10 +3488,17 @@
     def : VPatTiedBinaryNoMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
                                Wti.Vector, Vti.Vector,
                                Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
-    let AddedComplexity = 1 in
+    def : VPatBinaryNoMaskTU<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+                             Wti.Vector, Wti.Vector, Vti.Vector, Vti.Log2SEW,
+                             Wti.RegClass, Wti.RegClass, Vti.RegClass>;
+    let AddedComplexity = 1 in {
+    def : VPatTiedBinaryNoMaskTU<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+                                 Wti.Vector, Vti.Vector,
+                                 Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
     def : VPatTiedBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
                              Wti.Vector, Vti.Vector, Vti.Mask,
                              Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
+    }
     def : VPatBinaryMaskTA<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
                            Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
                            Vti.Log2SEW, Wti.RegClass,
@@ -3508,10 +3616,10 @@
 multiclass VPatBinaryM_VV<string intrinsic, string instruction,
                           list<VTypeInfo> vtilist> {
   foreach vti = vtilist in
-    defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX,
-                      vti.Mask, vti.Vector, vti.Vector, vti.Mask,
-                      vti.Log2SEW, VR,
-                      vti.RegClass, vti.RegClass>;
+    defm : VPatBinaryM<intrinsic, instruction # "_VV_" # vti.LMul.MX,
+                       vti.Mask, vti.Vector, vti.Vector, vti.Mask,
+                       vti.Log2SEW, VR,
+                       vti.RegClass, vti.RegClass>;
 }
 
 multiclass VPatBinarySwappedM_VV<string intrinsic, string instruction,
@@ -3527,20 +3635,20 @@
                           list<VTypeInfo> vtilist> {
   foreach vti = vtilist in {
     defvar kind = "V"#vti.ScalarSuffix;
-    defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
-                      vti.Mask, vti.Vector, vti.Scalar, vti.Mask,
-                      vti.Log2SEW, VR,
-                      vti.RegClass, vti.ScalarRegClass>;
+    defm : VPatBinaryM<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
+                       vti.Mask, vti.Vector, vti.Scalar, vti.Mask,
+                       vti.Log2SEW, VR,
+                       vti.RegClass, vti.ScalarRegClass>;
   }
 }
 
 multiclass VPatBinaryM_VI<string intrinsic, string instruction,
                           list<VTypeInfo> vtilist> {
   foreach vti = vtilist in
-    defm : VPatBinary<intrinsic, instruction # "_VI_" # vti.LMul.MX,
-                      vti.Mask, vti.Vector, XLenVT, vti.Mask,
-                      vti.Log2SEW, VR,
-                      vti.RegClass, simm5>;
+    defm : VPatBinaryM<intrinsic, instruction # "_VI_" # vti.LMul.MX,
+                       vti.Mask, vti.Vector, XLenVT, vti.Mask,
+                       vti.Log2SEW, VR,
+                       vti.RegClass, simm5>;
 }
 
 multiclass VPatBinaryV_VV_VX_VI<string intrinsic, string instruction,
@@ -4075,13 +4183,24 @@
   // Occurs when legalizing vrsub.vx intrinsics for i64 on RV32 since we need
   // to use a more complex splat sequence. Add the pattern for all VTs for
   // consistency.
-  def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector vti.RegClass:$rs2),
+  def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector (undef)),
+                                         (vti.Vector vti.RegClass:$rs2),
                                          (vti.Vector vti.RegClass:$rs1),
                                          VLOpFrag)),
             (!cast<Instruction>("PseudoVSUB_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
                                                               vti.RegClass:$rs2,
                                                               GPR:$vl,
                                                               vti.Log2SEW)>;
+  def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector vti.RegClass:$merge),
+                                         (vti.Vector vti.RegClass:$rs2),
+                                         (vti.Vector vti.RegClass:$rs1),
+                                         VLOpFrag)),
+            (!cast<Instruction>("PseudoVSUB_VV_"#vti.LMul.MX#"_TU")
+                                                      vti.RegClass:$merge,
+                                                      vti.RegClass:$rs1,
+                                                      vti.RegClass:$rs2,
+                                                      GPR:$vl,
+                                                      vti.Log2SEW)>;
   def : Pat<(vti.Vector (int_riscv_vrsub_mask (vti.Vector vti.RegClass:$merge),
                                               (vti.Vector vti.RegClass:$rs2),
                                               (vti.Vector vti.RegClass:$rs1),
@@ -4098,7 +4217,8 @@
                                                       (XLenVT timm:$policy))>;
 
   // Match VSUB with a small immediate to vadd.vi by negating the immediate.
-  def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector vti.RegClass:$rs1),
+  def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector (undef)),
+                                        (vti.Vector vti.RegClass:$rs1),
                                         (vti.Scalar simm5_plus1:$rs2),
                                         VLOpFrag)),
             (!cast<Instruction>("PseudoVADD_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
@@ -4684,7 +4804,8 @@
 
 foreach vti = AllIntegerVectors in {
   // Emit shift by 1 as an add since it might be faster.
-  def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector vti.RegClass:$rs1),
+  def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector undef),
+                                        (vti.Vector vti.RegClass:$rs1),
                                         (XLenVT 1), VLOpFrag)),
             (!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
                                                               vti.RegClass:$rs1,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1643,9 +1643,10 @@
   SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisVT<3, XLenVT>,
   SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, SDTCisVT<5, XLenVT>
 ]>;
-def SDTRVVSlide1 : SDTypeProfile<1, 4, [
-  SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisInt<0>, SDTCisVT<2, XLenVT>,
-  SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT>
+def SDTRVVSlide1 : SDTypeProfile<1, 5, [
+  SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisInt<0>,
+  SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>,
+  SDTCisVT<5, XLenVT>
 ]>;
 
 def riscv_slideup_vl   : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>;
@@ -1660,16 +1661,30 @@
                                       VLOpFrag)),
             (!cast<Instruction>("PseudoVID_V_"#vti.LMul.MX) GPR:$vl, vti.Log2SEW)>;
 
-  def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rs1),
+  def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector undef),
+                                           (vti.Vector vti.RegClass:$rs1),
                                            GPR:$rs2, (vti.Mask true_mask),
                                            VLOpFrag)),
             (!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX)
                 vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
-  def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rs1),
+  def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rd),
+                                           (vti.Vector vti.RegClass:$rs1),
                                            GPR:$rs2, (vti.Mask true_mask),
                                            VLOpFrag)),
+            (!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX#"_TU")
+                vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
+  def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector undef),
+                                             (vti.Vector vti.RegClass:$rs1),
+                                             GPR:$rs2, (vti.Mask true_mask),
+                                             VLOpFrag)),
             (!cast<Instruction>("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX)
                 vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
+  def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rd),
+                                             (vti.Vector vti.RegClass:$rs1),
+                                             GPR:$rs2, (vti.Mask true_mask),
+                                             VLOpFrag)),
+            (!cast<Instruction>("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX#"_TU")
+                vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
 }
 
 foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
+; RUN:   < %s | FileCheck %s
+
+declare <vscale x 1 x i64> @llvm.riscv.vslide1down.mask.nxv1i64.i64(
+  <vscale x 1 x i64>,
+  <vscale x 1 x i64>,
+  i64,
+  <vscale x 1 x i1>,
+  i32,
+  i32);
+
+define <vscale x 1 x i64> @intrinsic_vslide1down_mask_tumu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_mask_tumu_vx_nxv1i64_nxv1i64_i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    slli a3, a2, 1
+; CHECK-NEXT:    vsetvli zero, a3, e32, m1, ta, mu
+; CHECK-NEXT:    vslide1down.vx v9, v9, a0
+; CHECK-NEXT:    vslide1down.vx v9, v9, a1
+; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vslide1down.mask.nxv1i64.i64(
+    <vscale x 1 x i64> %0,
+    <vscale x 1 x i64> %1,
+    i64 %2,
+    <vscale x 1 x i1> %3,
+    i32 %4, i32 0)
+
+  ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vslide1down_mask_tamu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_mask_tamu_vx_nxv1i64_nxv1i64_i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    slli a3, a2, 1
+; CHECK-NEXT:    vsetvli zero, a3, e32, m1, ta, mu
+; CHECK-NEXT:    vslide1down.vx v9, v9, a0
+; CHECK-NEXT:    vslide1down.vx v9, v9, a1
+; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vslide1down.mask.nxv1i64.i64(
+    <vscale x 1 x i64> %0,
+    <vscale x 1 x i64> %1,
+    i64 %2,
+    <vscale x 1 x i1> %3,
+    i32 %4, i32 1)
+
+  ret <vscale x 1 x i64> %a
+}
+
+
+; Fallback vslide1 to mask undisturbed until InsertVSETVLI supports mask agnostic.
+define <vscale x 1 x i64> @intrinsic_vslide1down_mask_tuma_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_mask_tuma_vx_nxv1i64_nxv1i64_i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    slli a3, a2, 1
+; CHECK-NEXT:    vsetvli zero, a3, e32, m1, ta, mu
+; CHECK-NEXT:    vslide1down.vx v9, v9, a0
+; CHECK-NEXT:    vslide1down.vx v9, v9, a1
+; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vslide1down.mask.nxv1i64.i64(
+    <vscale x 1 x i64> %0,
+    <vscale x 1 x i64> %1,
+    i64 %2,
+    <vscale x 1 x i1> %3,
+    i32 %4, i32 2)
+
+  ret <vscale x 1 x i64> %a
+}
+
+; Fallback vslide1 to mask undisturbed until InsertVSETVLI supports mask agnostic.
+define <vscale x 1 x i64> @intrinsic_vslide1down_mask_tama_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_mask_tama_vx_nxv1i64_nxv1i64_i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    slli a2, a2, 1
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vslide1down.mask.nxv1i64.i64(
+    <vscale x 1 x i64> undef,
+    <vscale x 1 x i64> %0,
+    i64 %1,
+    <vscale x 1 x i1> %2,
+    i32 %3, i32 3)
+
+  ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vslide1down_mask_tama_undef_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_mask_tama_undef_mask_vx_nxv1i64_nxv1i64_i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    slli a2, a2, 1
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vslide1down.mask.nxv1i64.i64(
+    <vscale x 1 x i64> undef,
+    <vscale x 1 x i64> %0,
+    i64 %1,
+    <vscale x 1 x i1> undef,
+    i32 %2, i32 3)
+
+  ret <vscale x 1 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll
--- a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
-; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=RV32
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
-; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=RV64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s --check-prefix=RV32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s --check-prefix=RV64
 
 declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8(
   <vscale x 1 x i8>,
@@ -118,3 +118,2088 @@
 
   ret <vscale x 1 x i8> %a
 }
+
+declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vaadd_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vaadd_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vaadd.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vaadd_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vaadd.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vaaddu.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vaaddu_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vaaddu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vaaddu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vaaddu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vaaddu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vaaddu.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vadd_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vadd_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vadd.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vadd_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vadd.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+declare <vscale x 1 x i8> @llvm.riscv.vand.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vand_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vand_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vand.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vand_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vand.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vand.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vasub_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vasub_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vasub.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vasub_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vasub.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vasubu.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vasubu_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vasubu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vasubu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vasubu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vasubu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vasubu.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vdiv.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vdiv_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vdiv_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vdiv.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vdiv_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vdiv.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vdiv.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vdivu.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vdivu_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vdivu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vdivu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vdivu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vdivu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vdivu.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfadd.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfadd.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfdiv.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfdiv.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfmax.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfmax.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfmin.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfmin.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfmul.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfmul.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  half,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfrdiv.vf v8, v9, fa0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfrdiv.vf v8, v9, fa0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    half %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfsgnj.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfsgnj.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfsgnjn.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfsgnjn.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfsgnjx.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfsgnjx.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  half,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfrsub.vf v8, v9, fa0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfrsub.vf v8, v9, fa0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    half %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  half,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfslide1down.vf v8, v9, fa0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfslide1down.vf v8, v9, fa0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    half %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  half,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfslide1up.vf v8, v9, fa0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfslide1up.vf v8, v9, fa0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    half %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16(
+  <vscale x 1 x float>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfwsub.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfwsub.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfwsub.wv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfwsub.wv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x float> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x half>,
+  iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vl4re16.v v24, (a0)
+; RV32-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
+; RV32-NEXT:    vfwsub.wv v8, v16, v24
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vl4re16.v v24, (a0)
+; RV64-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
+; RV64-NEXT:    vfwsub.wv v8, v16, v24
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16(
+    <vscale x 16 x float> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 16 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16(
+  <vscale x 1 x float>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfwmul.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfwmul.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfwadd.wv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfwadd.wv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x float> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16(
+  <vscale x 1 x float>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfwadd.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfwadd.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV32-NEXT:    vfsub.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; RV64-NEXT:    vfsub.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+
+declare <vscale x 1 x i64> @llvm.riscv.vslide1down.nxv1i64(
+  <vscale x 1 x i64>,
+  <vscale x 1 x i64>,
+  i64,
+  iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    slli a2, a2, 1
+; RV32-NEXT:    vsetvli zero, a2, e32, m1, tu, mu
+; RV32-NEXT:    vmv1r.v v10, v8
+; RV32-NEXT:    vslide1down.vx v10, v9, a0
+; RV32-NEXT:    vslide1down.vx v8, v10, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
+; RV64-NEXT:    vslide1down.vx v8, v9, a0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vslide1down.nxv1i64(
+    <vscale x 1 x i64> %0,
+    <vscale x 1 x i64> %1,
+    i64 %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vslide1up.nxv1i64.i64(
+  <vscale x 1 x i64>,
+  <vscale x 1 x i64>,
+  i64,
+  iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    slli a2, a2, 1
+; RV32-NEXT:    vsetvli zero, a2, e32, m1, tu, mu
+; RV32-NEXT:    vmv1r.v v10, v8
+; RV32-NEXT:    vslide1up.vx v10, v9, a1
+; RV32-NEXT:    vslide1up.vx v8, v10, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
+; RV64-NEXT:    vslide1up.vx v8, v9, a0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vslide1up.nxv1i64.i64(
+    <vscale x 1 x i64> %0,
+    <vscale x 1 x i64> %1,
+    i64 %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vmax.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vmax_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vmax_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vmax.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vmax_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vmax.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vmax.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vmaxu.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vmaxu_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vmaxu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vmaxu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vmaxu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vmaxu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vmaxu.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vmin.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vmin_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vmin_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vmin.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vmin_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vmin.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vmin.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vminu.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vminu_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vminu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vminu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vminu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vminu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vminu.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vmul.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vmul_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vmul_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vmul.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vmul_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vmul.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vmul.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vmulh.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vmulh_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vmulh_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vmulh.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vmulh_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vmulh.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vmulh.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vmulhsu.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vmulhsu_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vmulhsu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vmulhsu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vmulhsu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vmulhsu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vmulhsu.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vmulhu.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vmulhu_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vmulhu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vmulhu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vmulhu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vmulhu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vmulhu.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vnclip.nxv1i8.nxv1i16.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vnclip_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vnclip_wv_nxv1i8_nxv1i16_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vnclip.wv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vnclip_wv_nxv1i8_nxv1i16_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vnclip.wv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vnclip.nxv1i8.nxv1i16.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i16> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vnclipu.nxv1i8.nxv1i16.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vnclipu_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vnclipu_wv_nxv1i8_nxv1i16_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vnclipu.wv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vnclipu_wv_nxv1i8_nxv1i16_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vnclipu.wv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vnclipu.nxv1i8.nxv1i16.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i16> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vnsra.nxv1i8.nxv1i16.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vnsra_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vnsra_wv_nxv1i8_nxv1i16_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vnsra.wv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vnsra_wv_nxv1i8_nxv1i16_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vnsra.wv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vnsra.nxv1i8.nxv1i16.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i16> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vnsrl.nxv1i8.nxv1i16.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vnsrl_wv_nxv1i8_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vnsrl_wv_nxv1i8_nxv1i16_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vnsrl.wv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vnsrl_wv_nxv1i8_nxv1i16_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vnsrl.wv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vnsrl.nxv1i8.nxv1i16.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i16> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vor.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vor_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vor_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vor.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vor_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vor.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vor.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vrem.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vrem_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vrem_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vrem.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vrem_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vrem.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vrem.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vremu.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+declare <vscale x 1 x i8> @llvm.riscv.vrgather.vv.nxv1i8.i32(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vrgather_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vrgather_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vrgather.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vrgather_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vrgather.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vrgather.vv.nxv1i8.i32(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vrgather.vx.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vrgather_vx_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vrgather_vx_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
+; RV32-NEXT:    vrgather.vx v8, v9, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vrgather_vx_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
+; RV64-NEXT:    vrgather.vx v8, v9, a0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vrgather.vx.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    iXLen %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vrgatherei16.vv.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i16>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vrgatherei16_vv_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i16> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vrgatherei16_vv_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vrgatherei16.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vrgatherei16_vv_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vrgatherei16.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vrgatherei16.vv.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i16> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vrsub.nxv1i64.i64(
+  <vscale x 1 x i64>,
+  <vscale x 1 x i64>,
+  i64,
+  iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
+; RV32-NEXT:    vsub.vv v8, v10, v9
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
+; RV64-NEXT:    vrsub.vx v8, v9, a0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vrsub.nxv1i64.i64(
+    <vscale x 1 x i64> %0,
+    <vscale x 1 x i64> %1,
+    i64 %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vsadd.nxv1i64.i64(
+  <vscale x 1 x i64>,
+  <vscale x 1 x i64>,
+  i64,
+  iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
+; RV32-NEXT:    vsadd.vv v8, v9, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
+; RV64-NEXT:    vsadd.vx v8, v9, a0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vsadd.nxv1i64.i64(
+    <vscale x 1 x i64> %0,
+    <vscale x 1 x i64> %1,
+    i64 %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vsaddu.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vsaddu_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vsaddu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vsaddu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vsaddu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vsaddu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vsaddu.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vsll.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vsll_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vsll_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vsll.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vsll_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vsll.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vsll.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vsmul.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vsmul_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vsmul_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vsmul.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vsmul_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vsmul.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vsmul.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.i64(
+  <vscale x 1 x i64>,
+  <vscale x 1 x i64>,
+  i64,
+  iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
+; RV32-NEXT:    vsmul.vv v8, v9, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
+; RV64-NEXT:    vsmul.vx v8, v9, a0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.i64(
+    <vscale x 1 x i64> %0,
+    <vscale x 1 x i64> %1,
+    i64 %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vsra.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vsra_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vsra_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vsra.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vsra_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vsra.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vsra.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+declare <vscale x 1 x i8> @llvm.riscv.vsrl.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vsrl_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vsrl_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vsrl.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vsrl_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vsrl.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vsrl.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vssra.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vssra_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vssra_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vssra.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vssra_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vssra.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vssra.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vssrl.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vssrl_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vssrl_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vssrl.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vssrl_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vssrl.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vssrl.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vssub.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vssub_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vssub_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vssub.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vssub_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vssub.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vssub.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vssubu.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vssubu_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vssubu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vssubu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vssubu_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vssubu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vssubu.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vssub.nxv1i64.i64(
+  <vscale x 1 x i64>,
+  <vscale x 1 x i64>,
+  i64,
+  iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vssub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
+; RV32-NEXT:    vssub.vv v8, v9, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
+; RV64-NEXT:    vssub.vx v8, v9, a0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vssub.nxv1i64.i64(
+    <vscale x 1 x i64> %0,
+    <vscale x 1 x i64> %1,
+    i64 %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vssubu.nxv1i64.i64(
+  <vscale x 1 x i64>,
+  <vscale x 1 x i64>,
+  i64,
+  iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
+; RV32-NEXT:    vssubu.vv v8, v9, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
+; RV64-NEXT:    vssubu.vx v8, v9, a0
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vssubu.nxv1i64.i64(
+    <vscale x 1 x i64> %0,
+    <vscale x 1 x i64> %1,
+    i64 %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vsub.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vsub_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vsub_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vsub.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vsub_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vsub.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vsub.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vwadd.nxv1i16.nxv1i8.nxv1i8(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vwadd_vv_nxv1i16_nxv1i8_nxv1i8(<vscale x 1 x i16> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vwadd_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vwadd.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vwadd_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vwadd.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vwadd.nxv1i16.nxv1i8.nxv1i8(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vwadd.wv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vwadd.wv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i16> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vwaddu.nxv1i16.nxv1i8.nxv1i8(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vwaddu_vv_nxv1i16_nxv1i8_nxv1i8(<vscale x 1 x i16> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vwaddu_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vwaddu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vwaddu_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vwaddu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.nxv1i16.nxv1i8.nxv1i8(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vwmul.nxv1i16.nxv1i8.nxv1i8(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vwmul_vv_nxv1i16_nxv1i8_nxv1i8(<vscale x 1 x i16> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vwmul_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vwmul.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vwmul_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vwmul.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vwmul.nxv1i16.nxv1i8.nxv1i8(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vwmulu.nxv1i16.nxv1i8.nxv1i8(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vwmulu_vv_nxv1i16_nxv1i8_nxv1i8(<vscale x 1 x i16> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vwmulu_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vwmulu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vwmulu_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vwmulu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vwmulu.nxv1i16.nxv1i8.nxv1i8(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vwmulsu.nxv1i16.nxv1i8.nxv1i8(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vwmulsu_vv_nxv1i16_nxv1i8_nxv1i8(<vscale x 1 x i16> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vwmulsu_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vwmulsu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vwmulsu_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vwmulsu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vwmulsu.nxv1i16.nxv1i8.nxv1i8(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vwsub.nxv1i16.nxv1i8.nxv1i8(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vwsub_vv_nxv1i16_nxv1i8_nxv1i8(<vscale x 1 x i16> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vwsub_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vwsub.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vwsub_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vwsub.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vwsub.nxv1i16.nxv1i8.nxv1i8(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vwsub.wv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vwsub.wv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i16> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8_tied(<vscale x 1 x i16> %0, <vscale x 1 x i8> %1, iXLen %2) nounwind {
+; RV32-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8_tied:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vwsub.wv v8, v8, v9
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8_tied:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vwsub.wv v8, v8, v9
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i8> %1,
+    iXLen %2)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vwsubu.nxv1i16.nxv1i8.nxv1i8(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vwsubu_vv_nxv1i16_nxv1i8_nxv1i8(<vscale x 1 x i16> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vwsubu_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vwsubu.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vwsubu_vv_nxv1i16_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vwsubu.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.nxv1i16.nxv1i8.nxv1i8(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
+  <vscale x 1 x i16>,
+  <vscale x 1 x i16>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vwsubu.wv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vwsubu.wv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
+    <vscale x 1 x i16> %0,
+    <vscale x 1 x i16> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vxor.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vxor_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vxor_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV32-NEXT:    vxor.vv v8, v9, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vxor_vv_nxv1i8_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; RV64-NEXT:    vxor.vv v8, v9, v10
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vxor.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}