diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -173,20 +173,13 @@ // This builtin has a granted vector length parameter. bit HasVL = true; - // There are several cases for specifying tail policy. - // 1. Add tail policy argument to masked intrinsics. It may have the maskedoff - // argument or not. - // * Have the maskedoff argument: (HasPolicy = true, HasMaskedOffOperand = true) - // Ex: vadd_vv_i8m1_mt(mask, maskedoff, op1, op2, vl, ta); - // * Do not have the maskedoff argument: (HasPolicy = true, HasMaskedOffOperand = false) - // Ex: vmacc_vv_i8m1_mt(mask, vd, vs1, vs2, vl, ta); - // 2. Add dest argument for no mask intrinsics. (TODO) - // Ex: vmv_v_x_i8m1_t(dest, src, vl); - // 3. Always tail agnostic. (HasPolicy = false) - // Ex: vmseq_vv_i8m1_b8_m(mask, maskedoff, op1, op2, vl); - // The tail policy argument is located at the last position. + // The masked intrinsic IR have the policy operand. + // The policy argument is located at the last position. bit HasPolicy = true; + // The nomask intrinsic IR have the passthru operand. + bit HasNoMaskPolicy = false; + // This builtin supports non-masked function overloading api. // All masked operations support overloading api. bit HasNoMaskedOverloaded = true; @@ -1290,6 +1283,8 @@ cast(ResultType)->getElementType(), Ops[1]->getType()}; Ops.insert(Ops.begin() + 1, llvm::Constant::getNullValue(IntrinsicTypes[1])); + // insert undef passthru + Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); break; } }], @@ -1321,6 +1316,8 @@ Ops[1]->getType()}; Ops.insert(Ops.begin() + 1, llvm::Constant::getAllOnesValue(IntrinsicTypes[1])); + // insert undef passthru + Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); break; } }], @@ -1369,6 +1366,8 @@ IntrinsicTypes = {ResultType, Ops[0]->getType(), Ops[1]->getType()}; Ops.insert(Ops.begin() + 1, Ops[0]); + // insert undef passthru + Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); break; } }], @@ -1402,6 +1401,8 @@ cast(Ops[0]->getType())->getElementType(), Ops[1]->getType()}; Ops.insert(Ops.begin() + 1, llvm::Constant::getNullValue(IntrinsicTypes[2])); + // insert undef passthru + Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); break; } }], @@ -1438,6 +1439,8 @@ Ops[1]->getType(), Ops[1]->getType()}; Ops.insert(Ops.begin() + 1, llvm::Constant::getNullValue(IntrinsicTypes[2])); + // insert undef passthru + Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); break; } }], @@ -1583,15 +1586,18 @@ // 12. Vector Integer Arithmetic Instructions // 12.1. Vector Single-Width Integer Add and Subtract +let HasNoMaskPolicy = true in { defm vadd : RVVIntBinBuiltinSet; defm vsub : RVVIntBinBuiltinSet; defm vrsub : RVVOutOp1BuiltinSet<"vrsub", "csil", [["vx", "v", "vve"], ["vx", "Uv", "UvUvUe"]]>; +} defm vneg_v : RVVPseudoUnaryBuiltin<"vrsub", "csil">; // 12.2. Vector Widening Integer Add/Subtract // Widening unsigned integer add/subtract, 2*SEW = SEW +/- SEW +let HasNoMaskPolicy = true in { defm vwaddu : RVVUnsignedWidenBinBuiltinSet; defm vwsubu : RVVUnsignedWidenBinBuiltinSet; // Widening signed integer add/subtract, 2*SEW = SEW +/- SEW @@ -1603,6 +1609,7 @@ // Widening signed integer add/subtract, 2*SEW = 2*SEW +/- SEW defm vwadd : RVVSignedWidenOp0BinBuiltinSet; defm vwsub : RVVSignedWidenOp0BinBuiltinSet; +} defm vwcvtu_x_x_v : RVVPseudoVWCVTBuiltin<"vwaddu", "vwcvtu_x", "csi", [["Uw", "UwUv"]]>; defm vwcvt_x_x_v : RVVPseudoVWCVTBuiltin<"vwadd", "vwcvt_x", "csi", @@ -1633,12 +1640,15 @@ } // 12.5. Vector Bitwise Logical Instructions +let HasNoMaskPolicy = true in { defm vand : RVVIntBinBuiltinSet; defm vxor : RVVIntBinBuiltinSet; defm vor : RVVIntBinBuiltinSet; +} defm vnot_v : RVVPseudoVNotBuiltin<"vxor", "csil">; // 12.6. Vector Single-Width Bit Shift Instructions +let HasNoMaskPolicy = true in { defm vsll : RVVShiftBuiltinSet; defm vsrl : RVVUnsignedShiftBuiltinSet; defm vsra : RVVSignedShiftBuiltinSet; @@ -1646,6 +1656,7 @@ // 12.7. Vector Narrowing Integer Right Shift Instructions defm vnsrl : RVVUnsignedNShiftBuiltinSet; defm vnsra : RVVSignedNShiftBuiltinSet; +} defm vncvt_x_x_w : RVVPseudoVNCVTBuiltin<"vnsrl", "vncvt_x", "csi", [["v", "vw"], ["Uv", "UvUw"]]>; @@ -1665,6 +1676,7 @@ } // 12.9. Vector Integer Min/Max Instructions +let HasNoMaskPolicy = true in { defm vminu : RVVUnsignedBinBuiltinSet; defm vmin : RVVSignedBinBuiltinSet; defm vmaxu : RVVUnsignedBinBuiltinSet; @@ -1685,9 +1697,10 @@ defm vdiv : RVVSignedBinBuiltinSet; defm vremu : RVVUnsignedBinBuiltinSet; defm vrem : RVVSignedBinBuiltinSet; +} // 12.12. Vector Widening Integer Multiply Instructions -let Log2LMUL = [-3, -2, -1, 0, 1, 2] in { +let Log2LMUL = [-3, -2, -1, 0, 1, 2], HasNoMaskPolicy = true in { defm vwmul : RVVOutOp0Op1BuiltinSet<"vwmul", "csi", [["vv", "w", "wvv"], ["vx", "w", "wve"]]>; @@ -1753,6 +1766,7 @@ // 13. Vector Fixed-Point Arithmetic Instructions // 13.1. Vector Single-Width Saturating Add and Subtract +let HasNoMaskPolicy = true in { defm vsaddu : RVVUnsignedBinBuiltinSet; defm vsadd : RVVSignedBinBuiltinSet; defm vssubu : RVVUnsignedBinBuiltinSet; @@ -1802,6 +1816,7 @@ [["vv", "w", "wvv"], ["vf", "w", "wve"]]>; } +} // 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions let HasPolicy = false in { @@ -1831,6 +1846,7 @@ def vfrec7 : RVVFloatingUnaryVVBuiltin; // 14.11. Vector Floating-Point MIN/MAX Instructions +let HasNoMaskPolicy = true in { defm vfmin : RVVFloatingBinBuiltinSet; defm vfmax : RVVFloatingBinBuiltinSet; @@ -1838,6 +1854,7 @@ defm vfsgnj : RVVFloatingBinBuiltinSet; defm vfsgnjn : RVVFloatingBinBuiltinSet; defm vfsgnjx : RVVFloatingBinBuiltinSet; +} defm vfneg_v : RVVPseudoVFUnaryBuiltin<"vfsgnjn", "xfd">; defm vfabs_v : RVVPseudoVFUnaryBuiltin<"vfsgnjx", "xfd">; @@ -2011,6 +2028,7 @@ } // 17.3.3. Vector Slide1up Instructions +let HasNoMaskPolicy = true in { defm vslide1up : RVVSlideOneBuiltinSet; defm vfslide1up : RVVFloatingBinVFBuiltinSet; @@ -2033,6 +2051,7 @@ [["vx", "Uv", "UvUvz"]]>; defm vrgatherei16 : RVVOutBuiltinSet<"vrgatherei16_vv", "csil", [["vv", "Uv", "UvUv(Log2EEW:4)Uv"]]>; +} // 17.5. Vector Compress Instruction let HasMask = false, HasPolicy = false, diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -162,6 +162,7 @@ bool IsMask; bool HasVL; bool HasPolicy; + bool HasNoMaskPolicy; bool HasNoMaskedOverloaded; bool HasAutoDef; // There is automiatic definition in header std::string ManualCodegen; @@ -177,8 +178,8 @@ RVVIntrinsic(StringRef Name, StringRef Suffix, StringRef MangledName, StringRef MangledSuffix, StringRef IRName, bool IsMask, bool HasMaskedOffOperand, bool HasVL, bool HasPolicy, - bool HasNoMaskedOverloaded, bool HasAutoDef, - StringRef ManualCodegen, const RVVTypes &Types, + bool HasNoMaskPolicy, bool HasNoMaskedOverloaded, + bool HasAutoDef, StringRef ManualCodegen, const RVVTypes &Types, const std::vector &IntrinsicTypes, const std::vector &RequiredFeatures, unsigned NF); ~RVVIntrinsic() = default; @@ -188,6 +189,7 @@ StringRef getMangledName() const { return MangledName; } bool hasVL() const { return HasVL; } bool hasPolicy() const { return HasPolicy; } + bool hasNoMaskPolicy() const { return HasNoMaskPolicy; } bool hasNoMaskedOverloaded() const { return HasNoMaskedOverloaded; } bool hasManualCodegen() const { return !ManualCodegen.empty(); } bool hasAutoDef() const { return HasAutoDef; } @@ -766,16 +768,15 @@ //===----------------------------------------------------------------------===// // RVVIntrinsic implementation //===----------------------------------------------------------------------===// -RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix, - StringRef NewMangledName, StringRef MangledSuffix, - StringRef IRName, bool IsMask, - bool HasMaskedOffOperand, bool HasVL, bool HasPolicy, - bool HasNoMaskedOverloaded, bool HasAutoDef, - StringRef ManualCodegen, const RVVTypes &OutInTypes, - const std::vector &NewIntrinsicTypes, - const std::vector &RequiredFeatures, - unsigned NF) +RVVIntrinsic::RVVIntrinsic( + StringRef NewName, StringRef Suffix, StringRef NewMangledName, + StringRef MangledSuffix, StringRef IRName, bool IsMask, + bool HasMaskedOffOperand, bool HasVL, bool HasPolicy, bool HasNoMaskPolicy, + bool HasNoMaskedOverloaded, bool HasAutoDef, StringRef ManualCodegen, + const RVVTypes &OutInTypes, const std::vector &NewIntrinsicTypes, + const std::vector &RequiredFeatures, unsigned NF) : IRName(IRName), IsMask(IsMask), HasVL(HasVL), HasPolicy(HasPolicy), + HasNoMaskPolicy(HasNoMaskPolicy), HasNoMaskedOverloaded(HasNoMaskedOverloaded), HasAutoDef(HasAutoDef), ManualCodegen(ManualCodegen.str()), NF(NF) { @@ -823,7 +824,7 @@ // IntrinsicTypes is nonmasked version index. Need to update it // if there is maskedoff operand (It is always in first operand). IntrinsicTypes = NewIntrinsicTypes; - if (IsMask && HasMaskedOffOperand) { + if ((IsMask && HasMaskedOffOperand) || (!IsMask && HasNoMaskPolicy)) { for (auto &I : IntrinsicTypes) { if (I >= 0) I += NF; @@ -860,6 +861,9 @@ } else { OS << " std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());\n"; } + } else if (hasNoMaskPolicy()) { + OS << " Ops.push_back(llvm::UndefValue::get(ResultType));\n"; + OS << " std::rotate(Ops.rbegin(), Ops.rbegin() + 1, Ops.rend());\n"; } OS << " IntrinsicTypes = {"; @@ -1107,6 +1111,8 @@ PrintFatalError("Builtin with same name has different HasPolicy"); else if (P.first->second->hasPolicy() != Def->hasPolicy()) PrintFatalError("Builtin with same name has different HasPolicy"); + else if (P.first->second->hasNoMaskPolicy() != Def->hasNoMaskPolicy()) + PrintFatalError("Builtin with same name has different HasNoMaskPolicy"); else if (P.first->second->getIntrinsicTypes() != Def->getIntrinsicTypes()) PrintFatalError("Builtin with same name has different IntrinsicTypes"); } @@ -1154,6 +1160,7 @@ bool HasMaskedOffOperand = R->getValueAsBit("HasMaskedOffOperand"); bool HasVL = R->getValueAsBit("HasVL"); bool HasPolicy = R->getValueAsBit("HasPolicy"); + bool HasNoMaskPolicy = R->getValueAsBit("HasNoMaskPolicy"); bool HasNoMaskedOverloaded = R->getValueAsBit("HasNoMaskedOverloaded"); std::vector Log2LMULList = R->getValueAsListOfInts("Log2LMUL"); StringRef ManualCodegen = R->getValueAsString("ManualCodegen"); @@ -1228,8 +1235,8 @@ Out.push_back(std::make_unique( Name, SuffixStr, MangledName, MangledSuffixStr, IRName, /*IsMask=*/false, /*HasMaskedOffOperand=*/false, HasVL, HasPolicy, - HasNoMaskedOverloaded, HasAutoDef, ManualCodegen, Types.getValue(), - IntrinsicTypes, RequiredFeatures, NF)); + HasNoMaskPolicy, HasNoMaskedOverloaded, HasAutoDef, ManualCodegen, + Types.getValue(), IntrinsicTypes, RequiredFeatures, NF)); if (HasMask) { // Create a mask intrinsic Optional MaskTypes = @@ -1237,8 +1244,9 @@ Out.push_back(std::make_unique( Name, SuffixStr, MangledName, MangledSuffixStr, IRNameMask, /*IsMask=*/true, HasMaskedOffOperand, HasVL, HasPolicy, - HasNoMaskedOverloaded, HasAutoDef, ManualCodegenMask, - MaskTypes.getValue(), IntrinsicTypes, RequiredFeatures, NF)); + HasNoMaskPolicy, HasNoMaskedOverloaded, HasAutoDef, + ManualCodegenMask, MaskTypes.getValue(), IntrinsicTypes, + RequiredFeatures, NF)); } } // end for Log2LMULList } // end for TypeRange diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -383,12 +383,13 @@ let VLOperand = 2; } // For destination vector type is the same as first and second source vector. - // Input: (vector_in, int_vector_in, vl) + // Input: (passthru, vector_in, int_vector_in, vl) class RISCVRGatherVVNoMask : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For destination vector type is the same as first and second source vector. // Input: (vector_in, vector_in, int_vector_in, vl, ta) @@ -400,13 +401,14 @@ [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { let VLOperand = 4; } - // Input: (vector_in, int16_vector_in, vl) + // Input: (passthru, vector_in, int16_vector_in, vl) class RISCVRGatherEI16VVNoMask : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>, + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For destination vector type is the same as first and second source vector. // Input: (vector_in, vector_in, int16_vector_in, vl, ta) @@ -421,12 +423,13 @@ } // For destination vector type is the same as first source vector, and the // second operand is XLen. - // Input: (vector_in, xlen_in, vl) + // Input: (passthru, vector_in, xlen_in, vl) class RISCVGatherVXNoMask : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_anyint_ty, LLVMMatchType<1>], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, + LLVMMatchType<1>], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For destination vector type is the same as first source vector (with mask). // Second operand is XLen. @@ -440,13 +443,14 @@ let VLOperand = 4; } // For destination vector type is the same as first source vector. - // Input: (vector_in, vector_in/scalar_in, vl) + // Input: (passthru, vector_in, vector_in/scalar_in, vl) class RISCVBinaryAAXNoMask : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 1; - let VLOperand = 2; + let SplatOperand = 2; + let VLOperand = 3; } // For destination vector type is the same as first source vector (with mask). // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) @@ -461,12 +465,13 @@ } // For destination vector type is the same as first source vector. The // second source operand must match the destination type or be an XLen scalar. - // Input: (vector_in, vector_in/scalar_in, vl) + // Input: (passthru, vector_in, vector_in/scalar_in, vl) class RISCVBinaryAAShiftNoMask : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For destination vector type is the same as first source vector (with mask). // The second source operand must match the destination type or be an XLen scalar. @@ -480,13 +485,14 @@ let VLOperand = 4; } // For destination vector type is NOT the same as first source vector. - // Input: (vector_in, vector_in/scalar_in, vl) + // Input: (passthru, vector_in, vector_in/scalar_in, vl) class RISCVBinaryABXNoMask : Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let SplatOperand = 1; - let VLOperand = 2; + let SplatOperand = 2; + let VLOperand = 3; } // For destination vector type is NOT the same as first source vector (with mask). // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) @@ -501,12 +507,13 @@ } // For destination vector type is NOT the same as first source vector. The // second source operand must match the destination type or be an XLen scalar. - // Input: (vector_in, vector_in/scalar_in, vl) + // Input: (passthru, vector_in, vector_in/scalar_in, vl) class RISCVBinaryABShiftNoMask : Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For destination vector type is NOT the same as first source vector (with mask). // The second source operand must match the destination type or be an XLen scalar. @@ -595,13 +602,14 @@ } // For Saturating binary operations. // The destination vector type is the same as first source vector. - // Input: (vector_in, vector_in/scalar_in, vl) + // Input: (passthru, vector_in, vector_in/scalar_in, vl) class RISCVSaturatingBinaryAAXNoMask : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { - let SplatOperand = 1; - let VLOperand = 2; + let SplatOperand = 2; + let VLOperand = 3; } // For Saturating binary operations with mask. // The destination vector type is the same as first source vector. @@ -618,12 +626,13 @@ // For Saturating binary operations. // The destination vector type is the same as first source vector. // The second source operand matches the destination type or is an XLen scalar. - // Input: (vector_in, vector_in/scalar_in, vl) + // Input: (passthru, vector_in, vector_in/scalar_in, vl) class RISCVSaturatingBinaryAAShiftNoMask : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For Saturating binary operations with mask. // The destination vector type is the same as first source vector. @@ -640,12 +649,13 @@ // For Saturating binary operations. // The destination vector type is NOT the same as first source vector. // The second source operand matches the destination type or is an XLen scalar. - // Input: (vector_in, vector_in/scalar_in, vl) + // Input: (passthru, vector_in, vector_in/scalar_in, vl) class RISCVSaturatingBinaryABShiftNoMask : Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, + llvm_anyint_ty], [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { - let VLOperand = 2; + let VLOperand = 3; } // For Saturating binary operations with mask. // The destination vector type is NOT the same as first source vector (with mask). diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -112,6 +112,7 @@ enum { TAIL_UNDISTURBED = 0, TAIL_AGNOSTIC = 1, + MASK_AGNOSTIC = 2, }; // Helper functions to read TSFlags. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -155,9 +155,9 @@ // and the fifth the VL. VSLIDEUP_VL, VSLIDEDOWN_VL, - // Matches the semantics of vslide1up/slide1down. The first operand is the - // source vector, the second is the XLenVT scalar value. The third and fourth - // operands are the mask and VL operands. + // Matches the semantics of vslide1up/slide1down. The first operand is + // passthru operand, the second is source vector, third is the XLenVT scalar + // value. The fourth and fifth operands are the mask and VL operands. VSLIDE1UP_VL, VSLIDE1DOWN_VL, // Matches the semantics of the vid.v instruction, with a mask and VL diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "RISCVISelLowering.h" +#include "MCTargetDesc/RISCVBaseInfo.h" #include "MCTargetDesc/RISCVMatInt.h" #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" @@ -4382,10 +4383,12 @@ ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero, InsertI64VL); // First slide in the hi value, then the lo in underneath it. - ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, - ValHi, I32Mask, InsertI64VL); - ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, - ValLo, I32Mask, InsertI64VL); + ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, + DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, + I32Mask, InsertI64VL); + ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, + DAG.getUNDEF(I32ContainerVT), ValInVec, ValLo, + I32Mask, InsertI64VL); // Bitcast back to the right container type. ValInVec = DAG.getBitcast(ContainerVT, ValInVec); } @@ -4681,8 +4684,7 @@ // We need to special case these when the scalar is larger than XLen. unsigned NumOps = Op.getNumOperands(); bool IsMasked = NumOps == 7; - unsigned OpOffset = IsMasked ? 1 : 0; - SDValue Scalar = Op.getOperand(2 + OpOffset); + SDValue Scalar = Op.getOperand(3); if (Scalar.getValueType().bitsLE(XLenVT)) break; @@ -4697,7 +4699,7 @@ // Convert the vector source to the equivalent nxvXi32 vector. MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); - SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset)); + SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(2)); SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, DAG.getConstant(0, DL, XLenVT)); @@ -4714,17 +4716,33 @@ // Shift the two scalar parts in using SEW=32 slide1up/slide1down // instructions. - if (IntNo == Intrinsic::riscv_vslide1up || - IntNo == Intrinsic::riscv_vslide1up_mask) { - Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi, - I32Mask, I32VL); - Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo, - I32Mask, I32VL); + SDValue Passthru = DAG.getBitcast(I32VT, Op.getOperand(1)); + if (!IsMasked) { + if (IntNo == Intrinsic::riscv_vslide1up) { + Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, + ScalarHi, I32Mask, I32VL); + Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, + ScalarLo, I32Mask, I32VL); + } else { + Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, + ScalarLo, I32Mask, I32VL); + Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, + ScalarHi, I32Mask, I32VL); + } } else { - Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo, - I32Mask, I32VL); - Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi, - I32Mask, I32VL); + // TODO Those VSLDIE1 could be TAMA because we use vmerge to select maskedoff + SDValue Undef = DAG.getUNDEF(I32VT); + if (IntNo == Intrinsic::riscv_vslide1up_mask) { + Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Undef, Vec, + ScalarHi, I32Mask, I32VL); + Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Undef, Vec, + ScalarLo, I32Mask, I32VL); + } else { + Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Undef, Vec, + ScalarLo, I32Mask, I32VL); + Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Undef, Vec, + ScalarHi, I32Mask, I32VL); + } } // Convert back to nxvXi64. @@ -4732,11 +4750,25 @@ if (!IsMasked) return Vec; - // Apply mask after the operation. SDValue Mask = Op.getOperand(NumOps - 3); SDValue MaskedOff = Op.getOperand(1); - return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL); + // Assume Policy operand is the last operand. + uint64_t Policy = Op.getConstantOperandVal(NumOps - 1); + // TAMA don't need to select value from maskedoff. + if (MaskedOff.isUndef()) { + assert(Policy == (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && + "Invalid policy, undef maskedoff need to have tail and mask " + "agnostic"); + return Vec; + } + // TAMU + if (Policy == RISCVII::TAIL_AGNOSTIC) + return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, + VL); + // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. + // It's fine because vmerge does not care mask policy. + return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, VL); } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -996,6 +996,24 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoBinaryNoMaskTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + // Special version of VPseudoBinaryNoMask where we pretend the first source is // tied to the destination. // This allows maskedoff and rs2 to be the same register. @@ -1017,6 +1035,25 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoTiedBinaryNoMaskTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, + Op2Class:$rs1, + AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 0; // Merge is also rs2. + let HasDummyMask = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoIStoreNoMask LMUL, bit Ordered>: Pseudo<(outs), @@ -1652,6 +1689,8 @@ let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoBinaryNoMask; + def "_" # MInfo.MX # "_TU" : VPseudoBinaryNoMaskTU; def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskTA, RISCVMaskedPseudo; @@ -1681,6 +1720,8 @@ let VLMul = lmul.value in { def "_" # lmul.MX # "_" # emul.MX : VPseudoBinaryNoMask; + def "_" # lmul.MX # "_" # emul.MX # "_TU": VPseudoBinaryNoMaskTU; def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskTA; } @@ -1693,6 +1734,8 @@ let VLMul = MInfo.value in { def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask; + def "_" # MInfo.MX # "_TIED_TU": VPseudoTiedBinaryNoMaskTU; def "_" # MInfo.MX # "_MASK_TIED" : VPseudoTiedBinaryMask; } @@ -2807,14 +2850,14 @@ (mask_type VR:$rs2), GPR:$vl, sew)>; -class VPatBinaryNoMask : +class VPatBinaryM : Pat<(result_type (!cast(intrinsic_name) (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), @@ -2824,6 +2867,44 @@ (op2_type op2_kind:$rs2), GPR:$vl, sew)>; +class VPatBinaryNoMaskTA : + Pat<(result_type (!cast(intrinsic_name) + (result_type (undef)), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + VLOpFrag)), + (!cast(inst) + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + GPR:$vl, sew)>; + +class VPatBinaryNoMaskTU : + Pat<(result_type (!cast(intrinsic_name) + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + VLOpFrag)), + (!cast(inst#"_TU") + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + GPR:$vl, sew)>; + // Same as above but source operands are swapped. class VPatBinaryNoMaskSwapped : Pat<(result_type (!cast(intrinsic_name) + (result_type (undef)), (result_type result_reg_class:$rs1), (op2_type op2_kind:$rs2), VLOpFrag)), @@ -2925,6 +3007,23 @@ (op2_type op2_kind:$rs2), GPR:$vl, sew)>; +class VPatTiedBinaryNoMaskTU : + Pat<(result_type (!cast(intrinsic_name) + (result_type result_reg_class:$merge), + (result_type result_reg_class:$merge), + (op2_type op2_kind:$rs2), + VLOpFrag)), + (!cast(inst#"_TIED_TU") + (result_type result_reg_class:$merge), + (op2_type op2_kind:$rs2), + GPR:$vl, sew)>; + class VPatTiedBinaryMask; } -multiclass VPatBinary { - def : VPatBinaryNoMask; + def : VPatBinaryM; def : VPatBinaryMask; @@ -3138,8 +3237,10 @@ VReg op1_reg_class, DAGOperand op2_kind> { - def : VPatBinaryNoMask; + def : VPatBinaryNoMaskTA; + def : VPatBinaryNoMaskTU; def : VPatBinaryMaskTA; @@ -3311,9 +3412,9 @@ multiclass VPatBinaryM_MM { foreach mti = AllMasks in - def : VPatBinaryNoMask; + def : VPatBinaryM; } multiclass VPatBinaryW_VV; - let AddedComplexity = 1 in + def : VPatBinaryNoMaskTU; + let AddedComplexity = 1 in { + def : VPatTiedBinaryNoMaskTU; def : VPatTiedBinaryMask; + } def : VPatBinaryMaskTA vtilist> { foreach vti = vtilist in - defm : VPatBinary; + defm : VPatBinaryM; } multiclass VPatBinarySwappedM_VV vtilist> { foreach vti = vtilist in { defvar kind = "V"#vti.ScalarSuffix; - defm : VPatBinary; + defm : VPatBinaryM; } } multiclass VPatBinaryM_VI vtilist> { foreach vti = vtilist in - defm : VPatBinary; + defm : VPatBinaryM; } multiclass VPatBinaryV_VV_VX_VI("PseudoVSUB_VV_"#vti.LMul.MX) vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector vti.RegClass:$merge), + (vti.Vector vti.RegClass:$rs2), + (vti.Vector vti.RegClass:$rs1), + VLOpFrag)), + (!cast("PseudoVSUB_VV_"#vti.LMul.MX#"_TU") + vti.RegClass:$merge, + vti.RegClass:$rs1, + vti.RegClass:$rs2, + GPR:$vl, + vti.Log2SEW)>; def : Pat<(vti.Vector (int_riscv_vrsub_mask (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$rs1), @@ -4040,7 +4159,8 @@ (XLenVT timm:$policy))>; // Match VSUB with a small immediate to vadd.vi by negating the immediate. - def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector (undef)), + (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), VLOpFrag)), (!cast("PseudoVADD_VI_"#vti.LMul.MX) vti.RegClass:$rs1, @@ -4626,7 +4746,8 @@ foreach vti = AllIntegerVectors in { // Emit shift by 1 as an add since it might be faster. - def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector undef), + (vti.Vector vti.RegClass:$rs1), (XLenVT 1), VLOpFrag)), (!cast("PseudoVADD_VV_"#vti.LMul.MX) vti.RegClass:$rs1, vti.RegClass:$rs1, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1592,9 +1592,10 @@ SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, SDTCisVT<5, XLenVT> ]>; -def SDTRVVSlide1 : SDTypeProfile<1, 4, [ - SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisInt<0>, SDTCisVT<2, XLenVT>, - SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT> +def SDTRVVSlide1 : SDTypeProfile<1, 5, [ + SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisInt<0>, + SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, + SDTCisVT<5, XLenVT> ]>; def riscv_slideup_vl : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>; @@ -1609,16 +1610,30 @@ VLOpFrag)), (!cast("PseudoVID_V_"#vti.LMul.MX) GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector undef), + (vti.Vector vti.RegClass:$rs1), GPR:$rs2, (vti.Mask true_mask), VLOpFrag)), (!cast("PseudoVSLIDE1UP_VX_"#vti.LMul.MX) vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rd), + (vti.Vector vti.RegClass:$rs1), GPR:$rs2, (vti.Mask true_mask), VLOpFrag)), + (!cast("PseudoVSLIDE1UP_VX_"#vti.LMul.MX#"_TU") + vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector undef), + (vti.Vector vti.RegClass:$rs1), + GPR:$rs2, (vti.Mask true_mask), + VLOpFrag)), (!cast("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX) vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rd), + (vti.Vector vti.RegClass:$rs1), + GPR:$rs2, (vti.Mask true_mask), + VLOpFrag)), + (!cast("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX#"_TU") + vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; } foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in { diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ +; RUN: < %s | FileCheck %s + +declare @llvm.riscv.vslide1down.mask.nxv1i64.i64( + , + , + i64, + , + i32, + i32); + +define @intrinsic_vslide1down_mask_tumu_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_tumu_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v9, v9, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 +; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4, i32 0) + + ret %a +} + +define @intrinsic_vslide1down_mask_tamu_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_tamu_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v9, v9, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 +; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4, i32 1) + + ret %a +} + + +; Fallback vslide1 to mask undisturbed untill InsertVSETVLI supports mask agnostic. +define @intrinsic_vslide1down_mask_tuma_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_tuma_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v9, v9, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 +; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4, i32 2) + + ret %a +} + +; Fallback vslide1 to mask undisturbed untill InsertVSETVLI supports mask agnostic. +define @intrinsic_vslide1down_mask_tama_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_tama_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i64.i64( + undef, + %0, + i64 %1, + %2, + i32 %3, i32 3) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll --- a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ -; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ -; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=RV64 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s --check-prefix=RV32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s --check-prefix=RV64 declare @llvm.riscv.vle.nxv1i8( , @@ -118,3 +118,2088 @@ ret %a } + +declare @llvm.riscv.vaadd.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vaadd_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vaadd_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vaadd.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vaadd_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vaadd.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vaadd.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vaaddu.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vaaddu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vaaddu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vaaddu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vaaddu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vaaddu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vaaddu.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vadd.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vadd_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vadd_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vadd.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vadd_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vadd.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} +declare @llvm.riscv.vand.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vand_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vand_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vand.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vand_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vand.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vand.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vasub.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vasub_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vasub_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vasub.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vasub_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vasub.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vasub.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vasubu.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vasubu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vasubu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vasubu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vasubu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vasubu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vasubu.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vdiv.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vdiv_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vdiv_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vdiv.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vdiv_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vdiv.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vdiv.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vdivu.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vdivu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vdivu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vdivu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vdivu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vdivu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vdivu.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfadd.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfadd.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfadd.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfadd.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfdiv.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfdiv.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfdiv.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfdiv.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfmax.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfmax.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfmax.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfmax.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfmin.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfmin.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfmin.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfmin.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfmul.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfmul.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfmul.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfmul.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfrdiv.nxv1f16.f16( + , + , + half, + iXLen); + +define @intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfrdiv.vf v8, v9, fa0 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfrdiv.vf v8, v9, fa0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfrdiv.nxv1f16.f16( + %0, + %1, + half %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfsgnj.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfsgnj.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfsgnj.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfsgnj.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfsgnjn.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfsgnjn.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfsgnjx.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfsgnjx.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfrsub.nxv1f16.f16( + , + , + half, + iXLen); + +define @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfrsub.vf v8, v9, fa0 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfrsub.vf v8, v9, fa0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfrsub.nxv1f16.f16( + %0, + %1, + half %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfslide1down.nxv1f16.f16( + , + , + half, + iXLen); + +define @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfslide1down.vf v8, v9, fa0 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfslide1down.vf v8, v9, fa0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfslide1down.nxv1f16.f16( + %0, + %1, + half %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfslide1up.nxv1f16.f16( + , + , + half, + iXLen); + +define @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfslide1up.vf v8, v9, fa0 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfslide1up.vf v8, v9, fa0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfslide1up.nxv1f16.f16( + %0, + %1, + half %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfwsub.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfwsub.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfwsub.wv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfwsub.wv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16( + , + , + , + iXLen); + +define @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vl4re16.v v24, (a0) +; RV32-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; RV32-NEXT: vfwsub.wv v8, v16, v24 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vl4re16.v v24, (a0) +; RV64-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; RV64-NEXT: vfwsub.wv v8, v16, v24 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfwmul.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfwmul.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfwadd.wv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfwadd.wv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfwadd.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfwadd.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vfsub.nxv1f16.nxv1f16( + , + , + , + iXLen); + +define @intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV32-NEXT: vfsub.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; RV64-NEXT: vfsub.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vfsub.nxv1f16.nxv1f16( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + + +declare @llvm.riscv.vslide1down.nxv1i64( + , + , + i64, + iXLen); + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, mu +; RV32-NEXT: vmv1r.v v10, v8 +; RV32-NEXT: vslide1down.vx v10, v9, a0 +; RV32-NEXT: vslide1down.vx v8, v10, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vslide1down.vx v8, v9, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64( + %0, + %1, + i64 %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv1i64.i64( + , + , + i64, + iXLen); + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, mu +; RV32-NEXT: vmv1r.v v10, v8 +; RV32-NEXT: vslide1up.vx v10, v9, a1 +; RV32-NEXT: vslide1up.vx v8, v10, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vslide1up.vx v8, v9, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + %0, + %1, + i64 %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vmax.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vmax_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmax_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vmax.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmax_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vmax.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vmax.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vmaxu.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vmaxu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmaxu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vmaxu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmaxu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vmaxu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vmaxu.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vmin.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vmin_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmin_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vmin.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmin_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vmin.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vmin.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vminu.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vminu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vminu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vminu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vminu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vminu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vminu.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vmul.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vmul_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmul_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vmul.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmul_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vmul.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vmul.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vmulh.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vmulh_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmulh_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vmulh.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmulh_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vmulh.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vmulh.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vmulhsu.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vmulhsu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmulhsu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vmulhsu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmulhsu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vmulhsu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vmulhsu.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vmulhu.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vmulhu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmulhu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vmulhu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmulhu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vmulhu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vmulhu.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vnclip.nxv1i8.nxv1i16.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vnclip_wv_nxv1i8_nxv1i16_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vnclip_wv_nxv1i8_nxv1i16_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vnclip.wv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vnclip_wv_nxv1i8_nxv1i16_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vnclip.wv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vnclip.nxv1i8.nxv1i16.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vnclipu.nxv1i8.nxv1i16.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vnclipu_wv_nxv1i8_nxv1i16_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vnclipu_wv_nxv1i8_nxv1i16_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vnclipu.wv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vnclipu_wv_nxv1i8_nxv1i16_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vnclipu.wv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vnclipu.nxv1i8.nxv1i16.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vnsra.nxv1i8.nxv1i16.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vnsra_wv_nxv1i8_nxv1i16_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vnsra_wv_nxv1i8_nxv1i16_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vnsra.wv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vnsra_wv_nxv1i8_nxv1i16_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vnsra.wv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vnsra.nxv1i8.nxv1i16.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vnsrl.nxv1i8.nxv1i16.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vnsrl_wv_nxv1i8_nxv1i16_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vnsrl_wv_nxv1i8_nxv1i16_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vnsrl.wv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vnsrl_wv_nxv1i8_nxv1i16_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vnsrl.wv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vnsrl.nxv1i8.nxv1i16.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vor.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vor_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vor_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vor.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vor_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vor.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vor.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vrem.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vrem_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vrem_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vrem.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vrem_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vrem.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vrem.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vremu.nxv1i8.nxv1i8( + , + , + , + iXLen); +declare @llvm.riscv.vrgather.vv.nxv1i8.i32( + , + , + , + iXLen); + +define @intrinsic_vrgather_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vrgather_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vrgather.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vrgather_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vrgather.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vrgather.vv.nxv1i8.i32( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vrgather.vx.nxv1i8( + , + , + iXLen, + iXLen); + +define @intrinsic_vrgather_vx_nxv1i8_nxv1i8( %0, %1, iXLen %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vrgather_vx_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; RV32-NEXT: vrgather.vx v8, v9, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vrgather_vx_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; RV64-NEXT: vrgather.vx v8, v9, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vrgather.vx.nxv1i8( + %0, + %1, + iXLen %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vrgatherei16.vv.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vrgatherei16_vv_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vrgatherei16_vv_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vrgatherei16.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vrgatherei16_vv_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vrgatherei16.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vrgatherei16.vv.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vrsub.nxv1i64.i64( + , + , + i64, + iXLen); + +define @intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, mu +; RV32-NEXT: vsub.vv v8, v10, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vrsub.vx v8, v9, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vrsub.nxv1i64.i64( + %0, + %1, + i64 %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vsadd.nxv1i64.i64( + , + , + i64, + iXLen); + +define @intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, mu +; RV32-NEXT: vsadd.vv v8, v9, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vsadd.vx v8, v9, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vsadd.nxv1i64.i64( + %0, + %1, + i64 %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vsaddu.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vsaddu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vsaddu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vsaddu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsaddu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vsaddu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vsaddu.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vsll.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vsll_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vsll_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vsll.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsll_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vsll.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vsll.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vsmul.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vsmul_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vsmul.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vsmul.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vsmul.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vsmul.nxv1i64.i64( + , + , + i64, + iXLen); + +define @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, mu +; RV32-NEXT: vsmul.vv v8, v9, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vsmul.vx v8, v9, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vsmul.nxv1i64.i64( + %0, + %1, + i64 %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vsra.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vsra_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vsra_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vsra.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsra_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vsra.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vsra.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} +declare @llvm.riscv.vsrl.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vsrl_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vsrl_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vsrl.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsrl_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vsrl.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vsrl.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vssra.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vssra_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vssra_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vssra.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssra_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vssra.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vssra.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vssrl.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vssrl_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vssrl_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vssrl.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssrl_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vssrl.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vssrl.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vssub.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vssub_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vssub_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vssub.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssub_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vssub.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vssub.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vssubu.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vssubu_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vssubu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vssubu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssubu_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vssubu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vssubu.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vssub.nxv1i64.i64( + , + , + i64, + iXLen); + +define @intrinsic_vssub_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, mu +; RV32-NEXT: vssub.vv v8, v9, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vssub.vx v8, v9, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vssub.nxv1i64.i64( + %0, + %1, + i64 %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vssubu.nxv1i64.i64( + , + , + i64, + iXLen); + +define @intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, mu +; RV32-NEXT: vssubu.vv v8, v9, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vssubu.vx v8, v9, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vssubu.nxv1i64.i64( + %0, + %1, + i64 %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vsub.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vsub_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vsub_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vsub.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsub_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vsub.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vsub.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vwadd.nxv1i16.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vwadd_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vwadd_vv_nxv1i16_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vwadd.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vwadd_vv_nxv1i16_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vwadd.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.nxv1i16.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vwadd.wv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vwadd.wv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vwaddu.nxv1i16.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vwaddu_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vwaddu_vv_nxv1i16_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vwaddu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vwaddu_vv_nxv1i16_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vwaddu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.nxv1i16.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vwmul.nxv1i16.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vwmul_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vwmul_vv_nxv1i16_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vwmul.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vwmul_vv_nxv1i16_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vwmul.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vwmul.nxv1i16.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vwmulu.nxv1i16.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vwmulu_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vwmulu_vv_nxv1i16_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vwmulu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vwmulu_vv_nxv1i16_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vwmulu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vwmulu.nxv1i16.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vwmulsu.nxv1i16.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vwmulsu_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vwmulsu_vv_nxv1i16_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vwmulsu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vwmulsu_vv_nxv1i16_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vwmulsu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vwmulsu.nxv1i16.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vwsub.nxv1i16.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vwsub_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vwsub_vv_nxv1i16_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vwsub.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vwsub_vv_nxv1i16_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vwsub.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.nxv1i16.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vwsub.wv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vwsub.wv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +define @intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8_tied( %0, %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8_tied: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vwsub.wv v8, v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8_tied: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vwsub.wv v8, v8, v9 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( + %0, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.vwsubu.nxv1i16.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vwsubu_vv_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vwsubu_vv_nxv1i16_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vwsubu.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vwsubu_vv_nxv1i16_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vwsubu.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.nxv1i16.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vwsubu.wv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vwsubu.wv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +} + +declare @llvm.riscv.vxor.nxv1i8.nxv1i8( + , + , + , + iXLen); + +define @intrinsic_vxor_vv_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vxor_vv_nxv1i8_nxv1i8_nxv1i8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV32-NEXT: vxor.vv v8, v9, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vxor_vv_nxv1i8_nxv1i8_nxv1i8: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; RV64-NEXT: vxor.vv v8, v9, v10 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.vxor.nxv1i8.nxv1i8( + %0, + %1, + %2, + iXLen %3) + + ret %a +}