diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -169,6 +169,13 @@ // This builtin has a granted vector length parameter in the last position. bit HasVL = true; + // Normally, intrinsics have the policy argument if it is masked and + // have no policy argument if it is unmasked. When HasPolicy is false, it + // means the intrinsic has no policy argument regardless masked or unmasked. + // For example, when the output result is mask type or scalar type, there is + // no need to specify the policy. + bit HasPolicy = true; + // This builtin supports non-masked function overloading api. // All masked operations support overloading api. bit HasNoMaskedOverloaded = true; @@ -400,6 +407,7 @@ class RVVMaskUnaryBuiltin : RVVOutBuiltin<"m", "mm", "c"> { let Name = NAME # "_m"; + let HasPolicy = false; } class RVVMaskNullaryBuiltin : RVVOutBuiltin<"m", "m", "c"> { @@ -551,6 +559,7 @@ } let HasNoMaskedOverloaded = false, + HasPolicy = false, ManualCodegen = [{ IntrinsicTypes = {ResultType, Ops[1]->getType()}; Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); @@ -566,6 +575,19 @@ let IRName = "vle1"; let HasMask = false; } +} + +let HasNoMaskedOverloaded = false, + ManualCodegen = [{ + IntrinsicTypes = {ResultType, Ops[1]->getType()}; + Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); + }], + ManualCodegenMask= [{ + // Move mask to right before vl. + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 2); + IntrinsicTypes = {ResultType, Ops[3]->getType()}; + Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); + }] in { multiclass RVVVLEBuiltin types> { let Name = NAME # "_v", IRName = "vle", @@ -605,7 +627,7 @@ ManualCodegenMask = [{ { // Move mask to right before vl. - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 2); IntrinsicTypes = {ResultType, Ops[4]->getType()}; Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); Value *NewVL = Ops[2]; @@ -642,7 +664,7 @@ }], ManualCodegenMask= [{ // Move mask to right before vl. - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 2); IntrinsicTypes = {ResultType, Ops[4]->getType()}; Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); }] in { @@ -662,7 +684,7 @@ }], ManualCodegenMask = [{ // Move mask to right before vl. - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 2); IntrinsicTypes = {ResultType, Ops[2]->getType(), Ops[4]->getType()}; Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); }] in { @@ -682,6 +704,7 @@ } let HasMaskedOffOperand = false, + HasPolicy = false, ManualCodegen = [{ // Builtin: (ptr, value, vl). Intrinsic: (value, ptr, vl) std::swap(Ops[0], Ops[1]); @@ -699,6 +722,21 @@ let IRName = "vse1"; let HasMask = false; } +} + +let HasMaskedOffOperand = false, + ManualCodegen = [{ + // Builtin: (ptr, value, vl). Intrinsic: (value, ptr, vl) + std::swap(Ops[0], Ops[1]); + Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo()); + IntrinsicTypes = {Ops[0]->getType(), Ops[2]->getType()}; + }], + ManualCodegenMask= [{ + // Builtin: (mask, ptr, value, vl). Intrinsic: (value, ptr, mask, vl) + std::swap(Ops[0], Ops[2]); + Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo()); + IntrinsicTypes = {Ops[0]->getType(), Ops[3]->getType()}; + }] in { multiclass RVVVSEBuiltin types> { let Name = NAME # "_v", IRName = "vse", @@ -779,6 +817,7 @@ IRName = NAME, IRNameMask = NAME # "_mask", HasMaskedOffOperand = false, + HasPolicy = false, ManualCodegen = [{ // base, bindex, value, vl IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType()}; @@ -813,7 +852,7 @@ }], ManualCodegenMask = [{ { - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 2); // maskedoff, op1, mask, vl IntrinsicTypes = {ResultType, cast(ResultType)->getElementType(), @@ -843,7 +882,7 @@ }], ManualCodegenMask = [{ { - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 2); // maskedoff, op1, mask, vl IntrinsicTypes = {ResultType, cast(ResultType)->getElementType(), @@ -890,7 +929,7 @@ }], ManualCodegenMask = [{ { - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 2); // maskedoff, op1, mask, vl IntrinsicTypes = {ResultType, Ops[1]->getType(), @@ -922,7 +961,7 @@ }], ManualCodegenMask = [{ { - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 2); // maskedoff, op1, mask, vl IntrinsicTypes = {ResultType, Ops[1]->getType(), @@ -957,7 +996,7 @@ }], ManualCodegenMask = [{ { - std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 2); // maskedoff, op1, mask, vl IntrinsicTypes = {ResultType, Ops[1]->getType(), @@ -978,6 +1017,7 @@ let HasVL = false, HasMask = false, HasSideEffects = true, + HasPolicy = false, Log2LMUL = [0], ManualCodegen = [{IntrinsicTypes = {ResultType};}] in // Set XLEN type { @@ -1138,7 +1178,7 @@ } // 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions -let HasMask = false in { +let HasMask = false, HasPolicy = false in { defm vadc : RVVCarryinBuiltinSet; defm vmadc : RVVCarryOutInBuiltinSet<"vmadc_carry_in">; defm vmadc : RVVIntMaskOutBuiltinSet; @@ -1166,6 +1206,7 @@ ["Uv", "UvUw"]]>; // 12.8. Vector Integer Comparison Instructions +let HasPolicy = false in { defm vmseq : RVVIntMaskOutBuiltinSet; defm vmsne : RVVIntMaskOutBuiltinSet; defm vmsltu : RVVUnsignedMaskOutBuiltinSet; @@ -1176,6 +1217,7 @@ defm vmsgt : RVVSignedMaskOutBuiltinSet; defm vmsgeu : RVVUnsignedMaskOutBuiltinSet; defm vmsge : RVVSignedMaskOutBuiltinSet; +} // 12.9. Vector Integer Min/Max Instructions defm vminu : RVVUnsignedBinBuiltinSet; @@ -1211,6 +1253,7 @@ } // 12.13. Vector Single-Width Integer Multiply-Add Instructions +let HasPolicy = false in { defm vmacc : RVVIntTerBuiltinSet; defm vnmsac : RVVIntTerBuiltinSet; defm vmadd : RVVIntTerBuiltinSet; @@ -1231,10 +1274,11 @@ defm vwmaccus : RVVOutOp1Op2BuiltinSet<"vwmaccus", "csi", [["vx", "w", "wwUev"]]>; } +} // 12.15. Vector Integer Merge Instructions // C/C++ Operand: (mask, op1, op2, vl), Intrinsic: (op1, op2, mask, vl) -let HasMask = false, +let HasMask = false, HasPolicy = false, ManualCodegen = [{ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.begin() + 3); IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType()}; @@ -1247,7 +1291,7 @@ } // 12.16. Vector Integer Move Instructions -let HasMask = false in { +let HasMask = false, HasPolicy = false in { let MangledName = "vmv_v" in { defm vmv_v : RVVOutBuiltinSet<"vmv_v_v", "csil", [["v", "Uv", "UvUv"]]>; @@ -1311,6 +1355,7 @@ } // 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions +let HasPolicy = false in { defm vfmacc : RVVFloatingTerBuiltinSet; defm vfnmacc : RVVFloatingTerBuiltinSet; defm vfmsac : RVVFloatingTerBuiltinSet; @@ -1325,6 +1370,7 @@ defm vfwnmacc : RVVFloatingWidenTerBuiltinSet; defm vfwmsac : RVVFloatingWidenTerBuiltinSet; defm vfwnmsac : RVVFloatingWidenTerBuiltinSet; +} // 14.8. Vector Floating-Point Square-Root Instruction def vfsqrt : RVVFloatingUnaryVVBuiltin; @@ -1347,20 +1393,22 @@ defm vfabs_v : RVVPseudoVFUnaryBuiltin<"vfsgnjx", "fd">; // 14.13. Vector Floating-Point Compare Instructions +let HasPolicy = false in { defm vmfeq : RVVFloatingMaskOutBuiltinSet; defm vmfne : RVVFloatingMaskOutBuiltinSet; defm vmflt : RVVFloatingMaskOutBuiltinSet; defm vmfle : RVVFloatingMaskOutBuiltinSet; defm vmfgt : RVVFloatingMaskOutBuiltinSet; defm vmfge : RVVFloatingMaskOutBuiltinSet; +} // 14.14. Vector Floating-Point Classify Instruction -let Name = "vfclass_v" in +let Name = "vfclass_v", HasPolicy = false in def vfclass : RVVOp0Builtin<"Uv", "Uvv", "fd">; // 14.15. Vector Floating-Point Merge Instructio // C/C++ Operand: (mask, op1, op2, vl), Builtin: (op1, op2, mask, vl) -let HasMask = false, +let HasMask = false, HasPolicy = false, ManualCodegen = [{ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.begin() + 3); IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType()}; @@ -1409,6 +1457,7 @@ // 15. Vector Reduction Operations // 15.1. Vector Single-Width Integer Reduction Instructions +let HasPolicy = false in { defm vredsum : RVVIntReductionBuiltinSet; defm vredmaxu : RVVUnsignedReductionBuiltin; defm vredmax : RVVSignedReductionBuiltin; @@ -1436,6 +1485,7 @@ // 15.4. Vector Widening Floating-Point Reduction Instructions defm vfwredsum : RVVFloatingWidenReductionBuiltin; defm vfwredosum : RVVFloatingWidenReductionBuiltin; +} // 16. Vector Mask Instructions // 16.1. Vector Mask-Register Logical Instructions @@ -1453,6 +1503,7 @@ defm vmmv_m : RVVPseudoMaskBuiltin<"vmand", "c">; defm vmnot_m : RVVPseudoMaskBuiltin<"vmnand", "c">; +let HasPolicy = false in { // 16.2. Vector mask population count vpopc def vpopc : RVVMaskOp0Builtin<"um">; @@ -1468,7 +1519,7 @@ // 16.6. vmsof.m set-only-first mask bit def vmsof : RVVMaskUnaryBuiltin; -let HasNoMaskedOverloaded = false in { +let HasNoMaskedOverloaded = false, HasPolicy = false in { // 16.8. Vector Iota Instruction defm viota : RVVOutBuiltinSet<"viota", "csil", [["m", "Uv", "Uvm"]]>; @@ -1476,10 +1527,11 @@ defm vid : RVVOutBuiltinSet<"vid", "csil", [["v", "v", "v"], ["v", "Uv", "Uv"]]>; } +} // 17. Vector Permutation Instructions // 17.1. Integer Scalar Move Instructions -let HasMask = false in { +let HasMask = false, HasPolicy = false in { let HasVL = false, MangledName = "vmv_x" in defm vmv_x : RVVOp0BuiltinSet<"vmv_x_s", "csil", [["s", "ve", "ev"], @@ -1491,7 +1543,7 @@ } // 17.2. Floating-Point Scalar Move Instructions -let HasMask = false in { +let HasMask = false, HasPolicy = false in { let HasVL = false, MangledName = "vfmv_f" in defm vfmv_f : RVVOp0BuiltinSet<"vfmv_f_s", "fd", [["s", "ve", "ev"]]>; @@ -1502,10 +1554,12 @@ } // 17.3. Vector Slide Instructions +let HasPolicy = false in { // 17.3.1. Vector Slideup Instructions defm vslideup : RVVSlideBuiltinSet; // 17.3.2. Vector Slidedown Instructions defm vslidedown : RVVSlideBuiltinSet; +} // 17.3.3. Vector Slide1up Instructions defm vslide1up : RVVSlideOneBuiltinSet; @@ -1532,7 +1586,7 @@ [["vv", "Uv", "UvUv(Log2EEW:4)Uv"]]>; // 17.5. Vector Compress Instruction -let HasMask = false, +let HasMask = false, HasPolicy = false, ManualCodegen = [{ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.begin() + 3); IntrinsicTypes = {ResultType, Ops[3]->getType()}; @@ -1548,7 +1602,7 @@ // Miscellaneous let HasMask = false, HasVL = false, HasNoMaskedOverloaded = false, IRName = "" in { - let Name = "vreinterpret_v", + let Name = "vreinterpret_v", HasPolicy = false, ManualCodegen = [{ return Builder.CreateBitCast(Ops[0], ResultType); }] in { @@ -1568,7 +1622,7 @@ } } - let Name = "vundefined", + let Name = "vundefined", HasPolicy = false, ManualCodegen = [{ return llvm::UndefValue::get(ResultType); }] in { @@ -1578,7 +1632,7 @@ // LMUL truncation // C/C++ Operand: VecTy, IR Operand: VecTy, Index - let Name = "vlmul_trunc_v", + let Name = "vlmul_trunc_v", HasPolicy = false, ManualCodegen = [{ { ID = Intrinsic::experimental_vector_extract; IntrinsicTypes = {ResultType, Ops[0]->getType()}; @@ -1594,7 +1648,7 @@ // LMUL extension // C/C++ Operand: SubVecTy, IR Operand: VecTy, SubVecTy, Index - let Name = "vlmul_ext_v", + let Name = "vlmul_ext_v", HasPolicy = false, ManualCodegen = [{ ID = Intrinsic::experimental_vector_insert; IntrinsicTypes = {ResultType, Ops[0]->getType()}; @@ -1610,7 +1664,7 @@ } } - let Name = "vget_v", + let Name = "vget_v", HasPolicy = false, ManualCodegen = [{ { ID = Intrinsic::experimental_vector_extract; @@ -1628,7 +1682,7 @@ } } - let Name = "vset_v", Log2LMUL = [0, 1, 2], + let Name = "vset_v", Log2LMUL = [0, 1, 2], HasPolicy = false, ManualCodegen = [{ { ID = Intrinsic::experimental_vector_insert; @@ -1647,3 +1701,14 @@ } } } + +class RVVHeader +{ + code HeaderCode; +} + +let HeaderCode = [{ +#define VE_TAIL_UNDISTURBED 0 +#define VE_TAIL_AGNOSTIC 1 +}] in +def policy : RVVHeader; diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c @@ -892,7 +892,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8mf8_t test_vadd_vv_i8mf8_m(vbool64_t mask, vint8mf8_t maskedoff, vint8mf8_t op1, vint8mf8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -902,7 +902,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8mf8_t test_vadd_vx_i8mf8_m(vbool64_t mask, vint8mf8_t maskedoff, vint8mf8_t op1, int8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -912,7 +912,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8mf4_t test_vadd_vv_i8mf4_m(vbool32_t mask, vint8mf4_t maskedoff, vint8mf4_t op1, vint8mf4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -922,7 +922,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8mf4_t test_vadd_vx_i8mf4_m(vbool32_t mask, vint8mf4_t maskedoff, vint8mf4_t op1, int8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -932,7 +932,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8mf2_t test_vadd_vv_i8mf2_m(vbool16_t mask, vint8mf2_t maskedoff, vint8mf2_t op1, vint8mf2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -942,7 +942,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8mf2_t test_vadd_vx_i8mf2_m(vbool16_t mask, vint8mf2_t maskedoff, vint8mf2_t op1, int8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -952,7 +952,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8m1_t test_vadd_vv_i8m1_m(vbool8_t mask, vint8m1_t maskedoff, vint8m1_t op1, vint8m1_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -962,7 +962,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8m1_t test_vadd_vx_i8m1_m(vbool8_t mask, vint8m1_t maskedoff, vint8m1_t op1, int8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -972,7 +972,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8m2_t test_vadd_vv_i8m2_m(vbool4_t mask, vint8m2_t maskedoff, vint8m2_t op1, vint8m2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -982,7 +982,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8m2_t test_vadd_vx_i8m2_m(vbool4_t mask, vint8m2_t maskedoff, vint8m2_t op1, int8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -992,7 +992,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8m4_t test_vadd_vv_i8m4_m(vbool2_t mask, vint8m4_t maskedoff, vint8m4_t op1, vint8m4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1002,7 +1002,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8m4_t test_vadd_vx_i8m4_m(vbool2_t mask, vint8m4_t maskedoff, vint8m4_t op1, int8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1012,7 +1012,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8m8_t test_vadd_vv_i8m8_m(vbool1_t mask, vint8m8_t maskedoff, vint8m8_t op1, vint8m8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1022,7 +1022,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint8m8_t test_vadd_vx_i8m8_m(vbool1_t mask, vint8m8_t maskedoff, vint8m8_t op1, int8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1032,7 +1032,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vadd_vv_i16mf4_m(vbool64_t mask, vint16mf4_t maskedoff, vint16mf4_t op1, vint16mf4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1042,7 +1042,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vadd_vx_i16mf4_m(vbool64_t mask, vint16mf4_t maskedoff, vint16mf4_t op1, int16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1052,7 +1052,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vadd_vv_i16mf2_m(vbool32_t mask, vint16mf2_t maskedoff, vint16mf2_t op1, vint16mf2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1062,7 +1062,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vadd_vx_i16mf2_m(vbool32_t mask, vint16mf2_t maskedoff, vint16mf2_t op1, int16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1072,7 +1072,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vadd_vv_i16m1_m(vbool16_t mask, vint16m1_t maskedoff, vint16m1_t op1, vint16m1_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1082,7 +1082,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vadd_vx_i16m1_m(vbool16_t mask, vint16m1_t maskedoff, vint16m1_t op1, int16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1092,7 +1092,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vadd_vv_i16m2_m(vbool8_t mask, vint16m2_t maskedoff, vint16m2_t op1, vint16m2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1102,7 +1102,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vadd_vx_i16m2_m(vbool8_t mask, vint16m2_t maskedoff, vint16m2_t op1, int16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1112,7 +1112,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vadd_vv_i16m4_m(vbool4_t mask, vint16m4_t maskedoff, vint16m4_t op1, vint16m4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1122,7 +1122,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vadd_vx_i16m4_m(vbool4_t mask, vint16m4_t maskedoff, vint16m4_t op1, int16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1132,7 +1132,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vadd_vv_i16m8_m(vbool2_t mask, vint16m8_t maskedoff, vint16m8_t op1, vint16m8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1142,7 +1142,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vadd_vx_i16m8_m(vbool2_t mask, vint16m8_t maskedoff, vint16m8_t op1, int16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1152,7 +1152,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vadd_vv_i32mf2_m(vbool64_t mask, vint32mf2_t maskedoff, vint32mf2_t op1, vint32mf2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1162,7 +1162,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vadd_vx_i32mf2_m(vbool64_t mask, vint32mf2_t maskedoff, vint32mf2_t op1, int32_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1172,7 +1172,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vadd_vv_i32m1_m(vbool32_t mask, vint32m1_t maskedoff, vint32m1_t op1, vint32m1_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1182,7 +1182,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vadd_vx_i32m1_m(vbool32_t mask, vint32m1_t maskedoff, vint32m1_t op1, int32_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1192,7 +1192,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vadd_vv_i32m2_m(vbool16_t mask, vint32m2_t maskedoff, vint32m2_t op1, vint32m2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1202,7 +1202,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vadd_vx_i32m2_m(vbool16_t mask, vint32m2_t maskedoff, vint32m2_t op1, int32_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1212,7 +1212,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vadd_vv_i32m4_m(vbool8_t mask, vint32m4_t maskedoff, vint32m4_t op1, vint32m4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1222,7 +1222,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vadd_vx_i32m4_m(vbool8_t mask, vint32m4_t maskedoff, vint32m4_t op1, int32_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1232,7 +1232,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vadd_vv_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t op1, vint32m8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1242,7 +1242,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vadd_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t op1, int32_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1252,7 +1252,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vadd_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1262,7 +1262,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vadd_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1272,7 +1272,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vadd_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1282,7 +1282,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vadd_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1292,7 +1292,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vadd_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1302,7 +1302,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vadd_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1312,7 +1312,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vadd_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1322,7 +1322,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vadd_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1332,7 +1332,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8mf8_t test_vadd_vv_u8mf8_m(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t op1, vuint8mf8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1342,7 +1342,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8mf8_t test_vadd_vx_u8mf8_m(vbool64_t mask, vuint8mf8_t maskedoff, vuint8mf8_t op1, uint8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1352,7 +1352,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8mf4_t test_vadd_vv_u8mf4_m(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t op1, vuint8mf4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1362,7 +1362,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8mf4_t test_vadd_vx_u8mf4_m(vbool32_t mask, vuint8mf4_t maskedoff, vuint8mf4_t op1, uint8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1372,7 +1372,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8mf2_t test_vadd_vv_u8mf2_m(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t op1, vuint8mf2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1382,7 +1382,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8mf2_t test_vadd_vx_u8mf2_m(vbool16_t mask, vuint8mf2_t maskedoff, vuint8mf2_t op1, uint8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1392,7 +1392,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8m1_t test_vadd_vv_u8m1_m(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t op1, vuint8m1_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1402,7 +1402,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8m1_t test_vadd_vx_u8m1_m(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t op1, uint8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1412,7 +1412,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8m2_t test_vadd_vv_u8m2_m(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t op1, vuint8m2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1422,7 +1422,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8m2_t test_vadd_vx_u8m2_m(vbool4_t mask, vuint8m2_t maskedoff, vuint8m2_t op1, uint8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1432,7 +1432,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8m4_t test_vadd_vv_u8m4_m(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t op1, vuint8m4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1442,7 +1442,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8m4_t test_vadd_vx_u8m4_m(vbool2_t mask, vuint8m4_t maskedoff, vuint8m4_t op1, uint8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1452,7 +1452,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8m8_t test_vadd_vv_u8m8_m(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t op1, vuint8m8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1462,7 +1462,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint8m8_t test_vadd_vx_u8m8_m(vbool1_t mask, vuint8m8_t maskedoff, vuint8m8_t op1, uint8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1472,7 +1472,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vadd_vv_u16mf4_m(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t op1, vuint16mf4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1482,7 +1482,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vadd_vx_u16mf4_m(vbool64_t mask, vuint16mf4_t maskedoff, vuint16mf4_t op1, uint16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1492,7 +1492,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vadd_vv_u16mf2_m(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t op1, vuint16mf2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1502,7 +1502,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vadd_vx_u16mf2_m(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t op1, uint16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1512,7 +1512,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vadd_vv_u16m1_m(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t op1, vuint16m1_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1522,7 +1522,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vadd_vx_u16m1_m(vbool16_t mask, vuint16m1_t maskedoff, vuint16m1_t op1, uint16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1532,7 +1532,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vadd_vv_u16m2_m(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t op1, vuint16m2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1542,7 +1542,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vadd_vx_u16m2_m(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t op1, uint16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1552,7 +1552,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vadd_vv_u16m4_m(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t op1, vuint16m4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1562,7 +1562,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vadd_vx_u16m4_m(vbool4_t mask, vuint16m4_t maskedoff, vuint16m4_t op1, uint16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1572,7 +1572,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vadd_vv_u16m8_m(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t op1, vuint16m8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1582,7 +1582,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vadd_vx_u16m8_m(vbool2_t mask, vuint16m8_t maskedoff, vuint16m8_t op1, uint16_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1592,7 +1592,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vadd_vv_u32mf2_m(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t op1, vuint32mf2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1602,7 +1602,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vadd_vx_u32mf2_m(vbool64_t mask, vuint32mf2_t maskedoff, vuint32mf2_t op1, uint32_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1612,7 +1612,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vadd_vv_u32m1_m(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t op1, vuint32m1_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1622,7 +1622,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vadd_vx_u32m1_m(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t op1, uint32_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1632,7 +1632,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vadd_vv_u32m2_m(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t op1, vuint32m2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1642,7 +1642,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vadd_vx_u32m2_m(vbool16_t mask, vuint32m2_t maskedoff, vuint32m2_t op1, uint32_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1652,7 +1652,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vadd_vv_u32m4_m(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t op1, vuint32m4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1662,7 +1662,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vadd_vx_u32m4_m(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t op1, uint32_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1672,7 +1672,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vadd_vv_u32m8_m(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t op1, vuint32m8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1682,7 +1682,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vadd_vx_u32m8_m(vbool4_t mask, vuint32m8_t maskedoff, vuint32m8_t op1, uint32_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1692,7 +1692,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vadd_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1702,7 +1702,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vadd_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1712,7 +1712,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vadd_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1722,7 +1722,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vadd_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1732,7 +1732,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vadd_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1742,7 +1742,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vadd_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1752,7 +1752,7 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vadd_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } // @@ -1762,6 +1762,6 @@ // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vadd_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) { - return vadd(mask, maskedoff, op1, op2, vl); + return vadd(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c @@ -13,7 +13,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint8mf8_t test_vle8_v_i8mf8_m(vbool64_t mask, vint8mf8_t maskedoff, const int8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -24,7 +24,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint8mf4_t test_vle8_v_i8mf4_m(vbool32_t mask, vint8mf4_t maskedoff, const int8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -35,7 +35,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint8mf2_t test_vle8_v_i8mf2_m(vbool16_t mask, vint8mf2_t maskedoff, const int8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -46,7 +46,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint8m1_t test_vle8_v_i8m1_m(vbool8_t mask, vint8m1_t maskedoff, const int8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -57,7 +57,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint8m2_t test_vle8_v_i8m2_m(vbool4_t mask, vint8m2_t maskedoff, const int8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -68,7 +68,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint8m4_t test_vle8_v_i8m4_m(vbool2_t mask, vint8m4_t maskedoff, const int8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -79,7 +79,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint8m8_t test_vle8_v_i8m8_m(vbool1_t mask, vint8m8_t maskedoff, const int8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -90,7 +90,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint16mf4_t test_vle16_v_i16mf4_m(vbool64_t mask, vint16mf4_t maskedoff, const int16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -101,7 +101,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint16mf2_t test_vle16_v_i16mf2_m(vbool32_t mask, vint16mf2_t maskedoff, const int16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -112,7 +112,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint16m1_t test_vle16_v_i16m1_m(vbool16_t mask, vint16m1_t maskedoff, const int16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -123,7 +123,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint16m2_t test_vle16_v_i16m2_m(vbool8_t mask, vint16m2_t maskedoff, const int16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -134,7 +134,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint16m4_t test_vle16_v_i16m4_m(vbool4_t mask, vint16m4_t maskedoff, const int16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -145,7 +145,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint16m8_t test_vle16_v_i16m8_m(vbool2_t mask, vint16m8_t maskedoff, const int16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -156,7 +156,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint32mf2_t test_vle32_v_i32mf2_m(vbool64_t mask, vint32mf2_t maskedoff, const int32_t *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -167,7 +167,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint32m1_t test_vle32_v_i32m1_m(vbool32_t mask, vint32m1_t maskedoff, const int32_t *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -178,7 +178,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint32m2_t test_vle32_v_i32m2_m(vbool16_t mask, vint32m2_t maskedoff, const int32_t *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -189,7 +189,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint32m4_t test_vle32_v_i32m4_m(vbool8_t mask, vint32m4_t maskedoff, const int32_t *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -200,7 +200,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint32m8_t test_vle32_v_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, const int32_t *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -211,7 +211,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint64m1_t test_vle64_v_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, const int64_t *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -222,7 +222,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint64m2_t test_vle64_v_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, const int64_t *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -233,7 +233,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint64m4_t test_vle64_v_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, const int64_t *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -244,7 +244,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vint64m8_t test_vle64_v_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, const int64_t *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -255,7 +255,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint8mf8_t test_vle8_v_u8mf8_m(vbool64_t mask, vuint8mf8_t maskedoff, const uint8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -266,7 +266,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint8mf4_t test_vle8_v_u8mf4_m(vbool32_t mask, vuint8mf4_t maskedoff, const uint8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -277,7 +277,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint8mf2_t test_vle8_v_u8mf2_m(vbool16_t mask, vuint8mf2_t maskedoff, const uint8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -288,7 +288,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint8m1_t test_vle8_v_u8m1_m(vbool8_t mask, vuint8m1_t maskedoff, const uint8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -299,7 +299,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint8m2_t test_vle8_v_u8m2_m(vbool4_t mask, vuint8m2_t maskedoff, const uint8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -310,7 +310,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint8m4_t test_vle8_v_u8m4_m(vbool2_t mask, vuint8m4_t maskedoff, const uint8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -321,7 +321,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint8m8_t test_vle8_v_u8m8_m(vbool1_t mask, vuint8m8_t maskedoff, const uint8_t *base, size_t vl) { - return vle8(mask, maskedoff, base, vl); + return vle8(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -332,7 +332,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint16mf4_t test_vle16_v_u16mf4_m(vbool64_t mask, vuint16mf4_t maskedoff, const uint16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -343,7 +343,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint16mf2_t test_vle16_v_u16mf2_m(vbool32_t mask, vuint16mf2_t maskedoff, const uint16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -354,7 +354,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint16m1_t test_vle16_v_u16m1_m(vbool16_t mask, vuint16m1_t maskedoff, const uint16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -365,7 +365,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint16m2_t test_vle16_v_u16m2_m(vbool8_t mask, vuint16m2_t maskedoff, const uint16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -376,7 +376,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint16m4_t test_vle16_v_u16m4_m(vbool4_t mask, vuint16m4_t maskedoff, const uint16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -387,7 +387,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint16m8_t test_vle16_v_u16m8_m(vbool2_t mask, vuint16m8_t maskedoff, const uint16_t *base, size_t vl) { - return vle16(mask, maskedoff, base, vl); + return vle16(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -398,7 +398,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint32mf2_t test_vle32_v_u32mf2_m(vbool64_t mask, vuint32mf2_t maskedoff, const uint32_t *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -409,7 +409,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint32m1_t test_vle32_v_u32m1_m(vbool32_t mask, vuint32m1_t maskedoff, const uint32_t *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -420,7 +420,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint32m2_t test_vle32_v_u32m2_m(vbool16_t mask, vuint32m2_t maskedoff, const uint32_t *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -431,7 +431,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint32m4_t test_vle32_v_u32m4_m(vbool8_t mask, vuint32m4_t maskedoff, const uint32_t *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -442,7 +442,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint32m8_t test_vle32_v_u32m8_m(vbool4_t mask, vuint32m8_t maskedoff, const uint32_t *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -453,7 +453,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint64m1_t test_vle64_v_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, const uint64_t *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -464,7 +464,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint64m2_t test_vle64_v_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, const uint64_t *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -475,7 +475,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint64m4_t test_vle64_v_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, const uint64_t *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -486,7 +486,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vuint64m8_t test_vle64_v_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, const uint64_t *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -497,7 +497,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vfloat32mf2_t test_vle32_v_f32mf2_m(vbool64_t mask, vfloat32mf2_t maskedoff, const float *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -508,7 +508,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vfloat32m1_t test_vle32_v_f32m1_m(vbool32_t mask, vfloat32m1_t maskedoff, const float *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -519,7 +519,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vfloat32m2_t test_vle32_v_f32m2_m(vbool16_t mask, vfloat32m2_t maskedoff, const float *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -530,7 +530,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vfloat32m4_t test_vle32_v_f32m4_m(vbool8_t mask, vfloat32m4_t maskedoff, const float *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -541,7 +541,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vfloat32m8_t test_vle32_v_f32m8_m(vbool4_t mask, vfloat32m8_t maskedoff, const float *base, size_t vl) { - return vle32(mask, maskedoff, base, vl); + return vle32(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -552,7 +552,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vfloat64m1_t test_vle64_v_f64m1_m(vbool64_t mask, vfloat64m1_t maskedoff, const double *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -563,7 +563,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vfloat64m2_t test_vle64_v_f64m2_m(vbool32_t mask, vfloat64m2_t maskedoff, const double *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -574,7 +574,7 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vfloat64m4_t test_vle64_v_f64m4_m(vbool16_t mask, vfloat64m4_t maskedoff, const double *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } // @@ -585,5 +585,5 @@ // CHECK-RV64-NEXT: ret [[TMP1]] // vfloat64m8_t test_vle64_v_f64m8_m(vbool8_t mask, vfloat64m8_t maskedoff, const double *base, size_t vl) { - return vle64(mask, maskedoff, base, vl); + return vle64(mask, maskedoff, base, vl, VE_TAIL_AGNOSTIC); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c @@ -596,7 +596,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_i8mf8_m(vbool64_t mask, int8_t *base, vint8mf8_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -607,7 +607,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_i8mf4_m(vbool32_t mask, int8_t *base, vint8mf4_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -618,7 +618,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_i8mf2_m(vbool16_t mask, int8_t *base, vint8mf2_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -629,7 +629,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_i8m1_m(vbool8_t mask, int8_t *base, vint8m1_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -640,7 +640,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_i8m2_m(vbool4_t mask, int8_t *base, vint8m2_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -651,7 +651,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_i8m4_m(vbool2_t mask, int8_t *base, vint8m4_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -662,7 +662,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_i8m8_m(vbool1_t mask, int8_t *base, vint8m8_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -673,7 +673,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_i16mf4_m(vbool64_t mask, int16_t *base, vint16mf4_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -684,7 +684,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_i16mf2_m(vbool32_t mask, int16_t *base, vint16mf2_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -695,7 +695,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_i16m1_m(vbool16_t mask, int16_t *base, vint16m1_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -706,7 +706,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_i16m2_m(vbool8_t mask, int16_t *base, vint16m2_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -717,7 +717,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_i16m4_m(vbool4_t mask, int16_t *base, vint16m4_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -728,7 +728,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_i16m8_m(vbool2_t mask, int16_t *base, vint16m8_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -739,7 +739,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_i32mf2_m(vbool64_t mask, int32_t *base, vint32mf2_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -750,7 +750,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_i32m1_m(vbool32_t mask, int32_t *base, vint32m1_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -761,7 +761,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_i32m2_m(vbool16_t mask, int32_t *base, vint32m2_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -772,7 +772,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_i32m4_m(vbool8_t mask, int32_t *base, vint32m4_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -783,7 +783,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_i32m8_m(vbool4_t mask, int32_t *base, vint32m8_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -794,7 +794,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_i64m1_m(vbool64_t mask, int64_t *base, vint64m1_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -805,7 +805,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_i64m2_m(vbool32_t mask, int64_t *base, vint64m2_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -816,7 +816,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_i64m4_m(vbool16_t mask, int64_t *base, vint64m4_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -827,7 +827,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_i64m8_m(vbool8_t mask, int64_t *base, vint64m8_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -838,7 +838,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_u8mf8_m(vbool64_t mask, uint8_t *base, vuint8mf8_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -849,7 +849,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_u8mf4_m(vbool32_t mask, uint8_t *base, vuint8mf4_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -860,7 +860,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_u8mf2_m(vbool16_t mask, uint8_t *base, vuint8mf2_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -871,7 +871,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_u8m1_m(vbool8_t mask, uint8_t *base, vuint8m1_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -882,7 +882,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_u8m2_m(vbool4_t mask, uint8_t *base, vuint8m2_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -893,7 +893,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_u8m4_m(vbool2_t mask, uint8_t *base, vuint8m4_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -904,7 +904,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse8_v_u8m8_m(vbool1_t mask, uint8_t *base, vuint8m8_t value, size_t vl) { - return vse8(mask, base, value, vl); + return vse8(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -915,7 +915,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_u16mf4_m(vbool64_t mask, uint16_t *base, vuint16mf4_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -926,7 +926,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_u16mf2_m(vbool32_t mask, uint16_t *base, vuint16mf2_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -937,7 +937,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_u16m1_m(vbool16_t mask, uint16_t *base, vuint16m1_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -948,7 +948,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_u16m2_m(vbool8_t mask, uint16_t *base, vuint16m2_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -959,7 +959,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_u16m4_m(vbool4_t mask, uint16_t *base, vuint16m4_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -970,7 +970,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse16_v_u16m8_m(vbool2_t mask, uint16_t *base, vuint16m8_t value, size_t vl) { - return vse16(mask, base, value, vl); + return vse16(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -981,7 +981,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_u32mf2_m(vbool64_t mask, uint32_t *base, vuint32mf2_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -992,7 +992,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_u32m1_m(vbool32_t mask, uint32_t *base, vuint32m1_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1003,7 +1003,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_u32m2_m(vbool16_t mask, uint32_t *base, vuint32m2_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1014,7 +1014,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_u32m4_m(vbool8_t mask, uint32_t *base, vuint32m4_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1025,7 +1025,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_u32m8_m(vbool4_t mask, uint32_t *base, vuint32m8_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1036,7 +1036,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_u64m1_m(vbool64_t mask, uint64_t *base, vuint64m1_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1047,7 +1047,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_u64m2_m(vbool32_t mask, uint64_t *base, vuint64m2_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1058,7 +1058,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_u64m4_m(vbool16_t mask, uint64_t *base, vuint64m4_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1069,7 +1069,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_u64m8_m(vbool8_t mask, uint64_t *base, vuint64m8_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1080,7 +1080,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_f32mf2_m(vbool64_t mask, float *base, vfloat32mf2_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1091,7 +1091,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_f32m1_m(vbool32_t mask, float *base, vfloat32m1_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1102,7 +1102,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_f32m2_m(vbool16_t mask, float *base, vfloat32m2_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1113,7 +1113,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_f32m4_m(vbool8_t mask, float *base, vfloat32m4_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1124,7 +1124,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse32_v_f32m8_m(vbool4_t mask, float *base, vfloat32m8_t value, size_t vl) { - return vse32(mask, base, value, vl); + return vse32(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1135,7 +1135,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_f64m1_m(vbool64_t mask, double *base, vfloat64m1_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1146,7 +1146,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_f64m2_m(vbool32_t mask, double *base, vfloat64m2_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1157,7 +1157,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_f64m4_m(vbool16_t mask, double *base, vfloat64m4_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // @@ -1168,7 +1168,7 @@ // CHECK-RV64-NEXT: ret void // void test_vse64_v_f64m8_m(vbool8_t mask, double *base, vfloat64m8_t value, size_t vl) { - return vse64(mask, base, value, vl); + return vse64(mask, base, value, vl, VE_TAIL_AGNOSTIC); } // diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vadd-policy.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vadd-policy.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vadd-policy.c @@ -0,0 +1,44 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v \ +// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg \ +// RUN: | FileCheck --check-prefix=CHECK-RV64 %s + +#include + + +// CHECK-RV64-LABEL: @test_vadd_vv_i8m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vadd.nxv8i8.nxv8i8.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m1_t test_vadd_vv_i8m1 (vint8m1_t op1, vint8m1_t op2, size_t vl) { + return vadd_vv_i8m1(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vadd_vv_i8m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vadd.mask.nxv8i8.nxv8i8.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m1_t test_vadd_vv_i8m1_m (vbool8_t mask, vint8m1_t maskedoff, vint8m1_t op1, vint8m1_t op2, size_t vl) { + return vadd_vv_i8m1_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vadd_tu( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vadd.mask.nxv8i8.nxv8i8.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m1_t test_vadd_tu (vbool8_t mask, vint8m1_t maskedoff, vint8m1_t op1, vint8m1_t op2, size_t vl) { + return vadd_vv_i8m1_mt(mask, maskedoff, op1, op2, vl, VE_TAIL_UNDISTURBED); +} + +// CHECK-RV64-LABEL: @test_vadd_ta( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vadd.mask.nxv8i8.nxv8i8.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m1_t test_vadd_ta (vbool8_t mask, vint8m1_t maskedoff, vint8m1_t op1, vint8m1_t op2, size_t vl) { + return vadd_vv_i8m1_mt(mask, maskedoff, op1, op2, vl, VE_TAIL_AGNOSTIC); +} diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -156,6 +156,7 @@ bool IsMask; bool HasMaskedOffOperand; bool HasVL; + bool HasPolicy; bool HasNoMaskedOverloaded; bool HasAutoDef; // There is automiatic definition in header std::string ManualCodegen; @@ -169,8 +170,9 @@ public: RVVIntrinsic(StringRef Name, StringRef Suffix, StringRef MangledName, StringRef IRName, bool HasSideEffects, bool IsMask, - bool HasMaskedOffOperand, bool HasVL, bool HasNoMaskedOverloaded, - bool HasAutoDef, StringRef ManualCodegen, const RVVTypes &Types, + bool HasMaskedOffOperand, bool HasVL, bool HasPolicy, + bool HasNoMaskedOverloaded, bool HasAutoDef, + StringRef ManualCodegen, const RVVTypes &Types, const std::vector &IntrinsicTypes, StringRef RequiredExtension); ~RVVIntrinsic() = default; @@ -180,6 +182,7 @@ bool hasSideEffects() const { return HasSideEffects; } bool hasMaskedOffOperand() const { return HasMaskedOffOperand; } bool hasVL() const { return HasVL; } + bool hasPolicy() const { return HasPolicy; } bool hasNoMaskedOverloaded() const { return HasNoMaskedOverloaded; } bool hasManualCodegen() const { return !ManualCodegen.empty(); } bool hasAutoDef() const { return HasAutoDef; } @@ -195,6 +198,9 @@ // init the RVVIntrinsic ID and IntrinsicTypes. void emitCodeGenSwitchBody(raw_ostream &o) const; + // Emit the define macors for mask intrinsics using _mt intrinsics. + void emitIntrinsicMaskMacro(raw_ostream &o) const; + // Emit the macros for mapping C/C++ intrinsic function to builtin functions. void emitIntrinsicMacro(raw_ostream &o) const; @@ -227,6 +233,8 @@ private: /// Create all intrinsics and add them to \p Out void createRVVIntrinsics(std::vector> &Out); + /// Create Headers and add them to \p Out + void createRVVHeaders(raw_ostream &OS); /// Compute output and input types by applying different config (basic type /// and LMUL with type transformers). It also record result of type in legal /// or illegal set to avoid compute the same config again. The result maybe @@ -631,7 +639,7 @@ ScalarType = ScalarTypeKind::SignedLong; break; default: - PrintFatalError("Illegal primitive type transformers!"); + PrintFatalError("Illegal primitive type transformers: " + PType); } Transformer = Transformer.drop_back(); @@ -745,15 +753,15 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix, StringRef NewMangledName, StringRef IRName, bool HasSideEffects, bool IsMask, - bool HasMaskedOffOperand, bool HasVL, + bool HasMaskedOffOperand, bool HasVL, bool HasPolicy, bool HasNoMaskedOverloaded, bool HasAutoDef, StringRef ManualCodegen, const RVVTypes &OutInTypes, const std::vector &NewIntrinsicTypes, StringRef RequiredExtension) : IRName(IRName), HasSideEffects(HasSideEffects), IsMask(IsMask), HasMaskedOffOperand(HasMaskedOffOperand), HasVL(HasVL), - HasNoMaskedOverloaded(HasNoMaskedOverloaded), HasAutoDef(HasAutoDef), - ManualCodegen(ManualCodegen.str()) { + HasPolicy(HasPolicy), HasNoMaskedOverloaded(HasNoMaskedOverloaded), + HasAutoDef(HasAutoDef), ManualCodegen(ManualCodegen.str()) { // Init Name and MangledName Name = NewName.str(); @@ -765,6 +773,8 @@ Name += "_" + Suffix.str(); if (IsMask) { Name += "_m"; + if (HasPolicy) + Name += "t"; } // Init RISC-V extensions for (const auto &T : OutInTypes) { @@ -813,7 +823,10 @@ if (isMask()) { if (hasVL()) { - OS << " std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);\n"; + if (hasPolicy()) + OS << " std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 2);\n"; + else + OS << " std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);\n"; } else { OS << " std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());\n"; } @@ -853,6 +866,24 @@ OS << ")\n"; } +void RVVIntrinsic::emitIntrinsicMaskMacro(raw_ostream &OS) const { + OS << "#define " << getName().drop_back() << "("; + if (!InputTypes.empty()) { + ListSeparator LS; + for (unsigned i = 0, e = InputTypes.size() - 1; i != e; ++i) + OS << LS << "op" << i; + } + OS << ") \\\n"; + OS << "__builtin_rvv_" << getName() << "("; + ListSeparator LS; + if (!InputTypes.empty()) { + for (unsigned i = 0, e = InputTypes.size() - 1; i != e; ++i) + OS << LS << "(" << InputTypes[i]->getTypeStr() << ")(op" << i << ")"; + } + OS << LS << "(size_t)VE_TAIL_AGNOSTIC"; + OS << ")\n"; +} + void RVVIntrinsic::emitMangledFuncDef(raw_ostream &OS) const { OS << "__attribute__((clang_builtin_alias("; OS << "__builtin_rvv_" << getName() << ")))\n"; @@ -898,6 +929,8 @@ OS << "extern \"C\" {\n"; OS << "#endif\n\n"; + createRVVHeaders(OS); + std::vector> Defs; createRVVIntrinsics(Defs); @@ -965,6 +998,12 @@ Inst.emitIntrinsicMacro(OS); }); + // Use _mt to implement _m intrinsics. + emitArchMacroAndBody(Defs, OS, [](raw_ostream &OS, const RVVIntrinsic &Inst) { + if (Inst.isMask() && Inst.hasPolicy()) + Inst.emitIntrinsicMaskMacro(OS); + }); + OS << "#define __riscv_v_intrinsic_overloading 1\n"; // Print Overloaded APIs @@ -1066,6 +1105,7 @@ bool HasMask = R->getValueAsBit("HasMask"); bool HasMaskedOffOperand = R->getValueAsBit("HasMaskedOffOperand"); bool HasVL = R->getValueAsBit("HasVL"); + bool HasPolicy = R->getValueAsBit("HasPolicy"); bool HasNoMaskedOverloaded = R->getValueAsBit("HasNoMaskedOverloaded"); bool HasSideEffects = R->getValueAsBit("HasSideEffects"); std::vector Log2LMULList = R->getValueAsListOfInts("Log2LMUL"); @@ -1104,6 +1144,10 @@ ProtoMaskSeq.push_back("z"); } + if (HasPolicy) { + ProtoMaskSeq.push_back("Kz"); + } + // Create Intrinsics for each type and LMUL. for (char I : TypeRange) { for (int Log2LMUL : Log2LMULList) { @@ -1116,7 +1160,7 @@ // Create a non-mask intrinsic Out.push_back(std::make_unique( Name, SuffixStr, MangledName, IRName, HasSideEffects, - /*IsMask=*/false, /*HasMaskedOffOperand=*/false, HasVL, + /*IsMask=*/false, /*HasMaskedOffOperand=*/false, HasVL, HasPolicy, HasNoMaskedOverloaded, HasAutoDef, ManualCodegen, Types.getValue(), IntrinsicTypes, RequiredExtension)); if (HasMask) { @@ -1125,7 +1169,7 @@ computeTypes(I, Log2LMUL, ProtoMaskSeq); Out.push_back(std::make_unique( Name, SuffixStr, MangledName, IRNameMask, HasSideEffects, - /*IsMask=*/true, HasMaskedOffOperand, HasVL, + /*IsMask=*/true, HasMaskedOffOperand, HasVL, HasPolicy, HasNoMaskedOverloaded, HasAutoDef, ManualCodegenMask, MaskTypes.getValue(), IntrinsicTypes, RequiredExtension)); } @@ -1134,6 +1178,15 @@ } } +void RVVEmitter::createRVVHeaders(raw_ostream &OS) { + std::vector RVVHeaders = + Records.getAllDerivedDefinitions("RVVHeader"); + for (auto *R : RVVHeaders) { + StringRef HeaderCodeStr = R->getValueAsString("HeaderCode"); + OS << HeaderCodeStr.str(); + } +} + Optional RVVEmitter::computeTypes(BasicType BT, int Log2LMUL, ArrayRef PrototypeSeq) { diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -165,7 +165,7 @@ [LLVMMatchType<0>, LLVMPointerType>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyint_ty], + llvm_anyint_ty, LLVMMatchType<1>], [NoCapture>, IntrReadMem]>, RISCVVIntrinsic; // For unit stride fault-only-first load with mask // Input: (maskedoff, pointer, mask, vl) @@ -177,7 +177,7 @@ [LLVMMatchType<0>, LLVMPointerType>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<1>], + LLVMMatchType<1>, LLVMMatchType<1>], [NoCapture>]>, RISCVVIntrinsic; // For strided load // Input: (pointer, stride, vl) @@ -192,7 +192,8 @@ : Intrinsic<[llvm_anyvector_ty ], [LLVMMatchType<0>, LLVMPointerType>, llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>, + LLVMMatchType<1>], [NoCapture>, IntrReadMem]>, RISCVVIntrinsic; // For indexed load // Input: (pointer, index, vl) @@ -207,7 +208,8 @@ : Intrinsic<[llvm_anyvector_ty ], [LLVMMatchType<0>, LLVMPointerType>, llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], [NoCapture>, IntrReadMem]>, RISCVVIntrinsic; // For unit stride store // Input: (vector_in, pointer, vl) @@ -224,7 +226,7 @@ [llvm_anyvector_ty, LLVMPointerType>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyint_ty], + llvm_anyint_ty, LLVMMatchType<1>], [NoCapture>, IntrWriteMem]>, RISCVVIntrinsic; // For strided store // Input: (vector_in, pointer, stride, vl) @@ -240,7 +242,8 @@ : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerType>, llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>, + LLVMMatchType<1>], [NoCapture>, IntrWriteMem]>, RISCVVIntrinsic; // For indexed store // Input: (vector_in, pointer, index, vl) @@ -256,7 +259,8 @@ : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerType>, llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], [NoCapture>, IntrWriteMem]>, RISCVVIntrinsic; // For destination vector type is the same as source vector. // Input: (vector_in, vl) @@ -267,6 +271,12 @@ // For destination vector type is the same as first source vector (with mask). // Input: (vector_in, mask, vl) class RISCVUnaryAAMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<1>], + [IntrNoMem]>, RISCVVIntrinsic; + class RISCVUnaryAAMaskNoTA : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], @@ -288,7 +298,8 @@ class RISCVRGatherVVMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<1>], [IntrNoMem]>, RISCVVIntrinsic; // Input: (vector_in, int16_vector_in, vl) class RISCVRGatherEI16VVNoMask @@ -302,7 +313,8 @@ : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<1>], [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first source vector, and the // second operand is XLen. @@ -318,7 +330,8 @@ class RISCVGatherVXMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>, + LLVMMatchType<1>], [IntrNoMem]>, RISCVVIntrinsic { } // For destination vector type is the same as first source vector. @@ -334,7 +347,8 @@ class RISCVBinaryAAXMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], [IntrNoMem]>, RISCVVIntrinsic { let SplatOperand = 3; } @@ -351,7 +365,8 @@ class RISCVBinaryAAShiftMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is NOT the same as first source vector. // Input: (vector_in, vector_in/scalar_in, vl) @@ -366,7 +381,8 @@ class RISCVBinaryABXMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<3>], [IntrNoMem]>, RISCVVIntrinsic { let SplatOperand = 3; } @@ -383,7 +399,8 @@ class RISCVBinaryABShiftMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<3>], [IntrNoMem]>, RISCVVIntrinsic; // For binary operations with V0 as input. // Input: (vector_in, vector_in/scalar_in, V0, vl) @@ -465,7 +482,8 @@ class RISCVSaturatingBinaryAAXMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { let SplatOperand = 3; } @@ -484,7 +502,8 @@ class RISCVSaturatingBinaryAAShiftMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic; // For Saturating binary operations. // The destination vector type is NOT the same as first source vector. @@ -501,7 +520,8 @@ class RISCVSaturatingBinaryABShiftMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<3>], [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic; class RISCVTernaryAAAXNoMask : Intrinsic<[llvm_anyvector_ty], @@ -584,7 +604,7 @@ : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, - llvm_anyint_ty], + llvm_anyint_ty, LLVMMatchType<2>], [IntrNoMem]>, RISCVVIntrinsic; // For unary operations with the same vector type in/out without mask // Output: (vector) @@ -618,7 +638,8 @@ class RISCVConversionMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], [IntrNoMem]>, RISCVVIntrinsic; // For atomic operations without mask // Input: (base, index, value, vl) @@ -1124,7 +1145,7 @@ defm vrgather_vx : RISCVRGatherVX; defm vrgatherei16_vv : RISCVRGatherEI16VV; - def "int_riscv_vcompress" : RISCVUnaryAAMask; + def "int_riscv_vcompress" : RISCVUnaryAAMaskNoTA; defm vaaddu : RISCVSaturatingBinaryAAX; defm vaadd : RISCVSaturatingBinaryAAX; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -76,6 +76,9 @@ // explicit operand. Used by RVV Pseudos. HasVLOpShift = HasSEWOpShift + 1, HasVLOpMask = 1 << HasVLOpShift, + + HasPolicyOpShift = HasVLOpShift + 1, + HasPolicyOpMask = 1 << HasPolicyOpShift, }; // Match with the definitions in RISCVInstrFormatsV.td @@ -132,6 +135,10 @@ return TSFlags & HasVLOpMask; } +static inline bool hasPolicyOp(uint64_t TSFlags) { + return TSFlags & HasPolicyOpMask; +} + // RISC-V Specific Machine Operand Flags enum { MO_None = 0, diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -365,7 +365,9 @@ RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); - unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm(); + unsigned Log2SEWIndex = + RISCVII::hasPolicyOp(TSFlags) ? NumOperands - 2 : NumOperands - 1; + unsigned Log2SEW = MI.getOperand(Log2SEWIndex).getImm(); // A Log2SEW of 0 is an operation on mask registers only. bool MaskRegOp = Log2SEW == 0; unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; @@ -393,8 +395,18 @@ } } + // If the instruction has policy argument, use the argument. + if (RISCVII::hasPolicyOp(TSFlags)) { + const MachineOperand &Op = MI.getOperand(NumOperands - 1); + TailAgnostic = Op.getImm() & 0x1; + } + if (RISCVII::hasVLOp(TSFlags)) { - const MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2); + unsigned Offset = 2; + if (RISCVII::hasPolicyOp(TSFlags)) + Offset = 3; + const MachineOperand &VLOp = + MI.getOperand(MI.getNumExplicitOperands() - Offset); if (VLOp.isImm()) InstrInfo.setAVLImm(VLOp.getImm()); else @@ -644,7 +656,11 @@ if (RISCVII::hasSEWOp(TSFlags)) { VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); if (RISCVII::hasVLOp(TSFlags)) { - MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2); + unsigned Offset = 2; + if (RISCVII::hasPolicyOp(TSFlags)) + Offset = 3; + MachineOperand &VLOp = + MI.getOperand(MI.getNumExplicitOperands() - Offset); if (VLOp.isReg()) { // Erase the AVL operand from the instruction. VLOp.setReg(RISCV::NoRegister); diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -178,6 +178,9 @@ bit HasVLOp = 0; let TSFlags{15} = HasVLOp; + + bit HasPolicyOp = false; + let TSFlags{16} = HasPolicyOp; } // Pseudo instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -642,7 +642,7 @@ Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, uimm5:$policy),[]>, RISCVVPseudo, RISCVVLE.val, VLMul> { let mayLoad = 1; @@ -652,6 +652,7 @@ let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; + let HasPolicyOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); } @@ -673,7 +674,7 @@ Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, GPR:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, uimm5:$policy),[]>, RISCVVPseudo, RISCVVLE.val, VLMul> { let mayLoad = 1; @@ -683,6 +684,7 @@ let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; + let HasPolicyOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); } @@ -707,7 +709,7 @@ Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, IdxClass:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, uimm5:$policy),[]>, RISCVVPseudo, RISCVVLX.val, VLMul, LMUL> { let mayLoad = 1; @@ -717,6 +719,7 @@ let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; + let HasPolicyOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); } @@ -860,6 +863,22 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoUnaryMaskTA : + Pseudo<(outs GetVRegNoV0.R:$rd), + (ins GetVRegNoV0.R:$merge, OpClass:$rs2, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, uimm5:$policy), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasPolicyOp = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + // mask unary operation without maskedoff class VPseudoMaskUnarySOutMask: Pseudo<(outs GPR:$rd), @@ -975,6 +994,26 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoBinaryMaskTA : + Pseudo<(outs GetVRegNoV0.R:$rd), + (ins GetVRegNoV0.R:$merge, + Op1Class:$rs2, Op2Class:$rs1, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, uimm5:$policy), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasPolicyOp = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + // Like VPseudoBinaryMask, but output can be V0. class VPseudoBinaryMOutMask.R:$rd), (ins GetVRegNoV0.R:$merge, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, uimm5:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1013,6 +1052,7 @@ let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 0; // Merge is also rs2. + let HasPolicyOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); } @@ -1491,8 +1531,8 @@ let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoBinaryNoMask; - def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask; + def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskTA; } } @@ -1519,8 +1559,8 @@ let VLMul = lmul.value in { def "_" # lmul.MX # "_" # emul.MX : VPseudoBinaryNoMask; - def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMask; + def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskTA; } } @@ -1712,6 +1752,15 @@ } } +multiclass VPseudoUnaryTAV_V { + foreach m = MxList.m in { + let VLMul = m.value in { + def "_V_" # m.MX : VPseudoUnaryNoMask; + def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA; + } + } +} + multiclass VPseudoUnaryV_V { foreach m = MxList.m in { let VLMul = m.value in { @@ -1727,8 +1776,8 @@ { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask; - def "_" # m.MX # "_MASK" : VPseudoUnaryMask; + def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA; } } } @@ -1739,8 +1788,8 @@ { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask; - def "_" # m.MX # "_MASK" : VPseudoUnaryMask; + def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA; } } } @@ -1751,8 +1800,8 @@ { let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask; - def "_" # m.MX # "_MASK" : VPseudoUnaryMask; + def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA; } } } @@ -2024,8 +2073,8 @@ string Constraint = ""> { let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoUnaryNoMask; - def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask; + def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA; } } @@ -2208,6 +2257,26 @@ (op2_type op2_reg_class:$rs2), (mask_type V0), GPR:$vl, sew)>; +class VPatUnaryMaskTA : + Pat<(result_type (!cast(intrinsic_name#"_mask") + (result_type result_reg_class:$merge), + (op2_type op2_reg_class:$rs2), + (mask_type V0), + VLOpFrag, (XLenVT uimm5:$policy))), + (!cast(inst#"_"#kind#"_"#vlmul.MX#"_MASK") + (result_type result_reg_class:$merge), + (op2_type op2_reg_class:$rs2), + (mask_type V0), GPR:$vl, sew, (XLenVT uimm5:$policy))>; + class VPatMaskUnaryNoMask : @@ -2309,6 +2378,28 @@ (op2_type op2_kind:$rs2), (mask_type V0), GPR:$vl, sew)>; +class VPatBinaryMaskTA : + Pat<(result_type (!cast(intrinsic_name#"_mask") + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), + VLOpFrag, (XLenVT uimm5:$policy))), + (!cast(inst#"_MASK") + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), GPR:$vl, sew, (XLenVT uimm5:$policy))>; + // Same as above but source operands are swapped. class VPatBinaryMaskSwapped(inst#"_MASK_TIED") (result_type result_reg_class:$merge), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew)>; + (mask_type V0), GPR:$vl, sew, (XLenVT uimm5:$policy))>; class VPatTernaryNoMask; - def : VPatUnaryMask; + def : VPatUnaryMaskTA; } } @@ -2517,9 +2608,9 @@ def : VPatUnaryNoMask; - def : VPatUnaryMask; + def : VPatUnaryMaskTA; } } @@ -2565,6 +2656,24 @@ op2_kind>; } +multiclass VPatBinaryTA +{ + def : VPatBinaryNoMask; + def : VPatBinaryMaskTA; +} + multiclass VPatBinarySwapped; } +multiclass VPatConversionTA +{ + def : VPatUnaryNoMask; + def : VPatUnaryMaskTA; +} + multiclass VPatBinaryV_VV vtilist> { foreach vti = vtilist in - defm : VPatBinary; + defm : VPatBinaryTA; } multiclass VPatBinaryV_VV_INT vtilist> { foreach vti = vtilist in { defvar ivti = GetIntVTypeInfo.Vti; - defm : VPatBinary; + defm : VPatBinaryTA; } } @@ -2675,10 +2801,10 @@ defvar emul_str = octuple_to_str.ret; defvar ivti = !cast("VI" # eew # emul_str); defvar inst = instruction # "_VV_" # vti.LMul.MX # "_" # emul_str; - defm : VPatBinary; + defm : VPatBinaryTA; } } } @@ -2687,29 +2813,29 @@ list vtilist> { foreach vti = vtilist in { defvar kind = "V"#vti.ScalarSuffix; - defm : VPatBinary; + defm : VPatBinaryTA; } } multiclass VPatBinaryV_VX_INT vtilist> { foreach vti = vtilist in - defm : VPatBinary; + defm : VPatBinaryTA; } multiclass VPatBinaryV_VI vtilist, Operand imm_type> { foreach vti = vtilist in - defm : VPatBinary; + defm : VPatBinaryTA; } multiclass VPatBinaryM_MM { @@ -2724,10 +2850,10 @@ foreach VtiToWti = vtilist in { defvar Vti = VtiToWti.Vti; defvar Wti = VtiToWti.Wti; - defm : VPatBinary; + defm : VPatBinaryTA; } } @@ -2737,10 +2863,10 @@ defvar Vti = VtiToWti.Vti; defvar Wti = VtiToWti.Wti; defvar kind = "V"#Vti.ScalarSuffix; - defm : VPatBinary; + defm : VPatBinaryTA; } } @@ -2756,10 +2882,10 @@ def : VPatTiedBinaryMask; - def : VPatBinaryMask; + def : VPatBinaryMaskTA; } } @@ -2769,10 +2895,10 @@ defvar Vti = VtiToWti.Vti; defvar Wti = VtiToWti.Wti; defvar kind = "W"#Vti.ScalarSuffix; - defm : VPatBinary; + defm : VPatBinaryTA; } } @@ -2781,10 +2907,10 @@ foreach VtiToWti = vtilist in { defvar Vti = VtiToWti.Vti; defvar Wti = VtiToWti.Wti; - defm : VPatBinary; + defm : VPatBinaryTA; } } @@ -2794,10 +2920,10 @@ defvar Vti = VtiToWti.Vti; defvar Wti = VtiToWti.Wti; defvar kind = "W"#Vti.ScalarSuffix; - defm : VPatBinary; + defm : VPatBinaryTA; } } @@ -2806,10 +2932,10 @@ foreach VtiToWti = vtilist in { defvar Vti = VtiToWti.Vti; defvar Wti = VtiToWti.Wti; - defm : VPatBinary; + defm : VPatBinaryTA; } } @@ -3122,8 +3248,8 @@ } } -multiclass VPatConversionVI_VF +multiclass VPatClassifyVI_VF { foreach fvti = AllFloatVectors in { @@ -3135,6 +3261,19 @@ } } +multiclass VPatConversionVI_VF +{ + foreach fvti = AllFloatVectors in + { + defvar ivti = GetIntVTypeInfo.Vti; + + defm : VPatConversionTA; + } +} + multiclass VPatConversionVF_VI { @@ -3142,9 +3281,9 @@ { defvar ivti = GetIntVTypeInfo.Vti; - defm : VPatConversion; + defm : VPatConversionTA; } } @@ -3154,9 +3293,9 @@ defvar fvti = fvtiToFWti.Vti; defvar iwti = GetIntVTypeInfo.Vti; - defm : VPatConversion; + defm : VPatConversionTA; } } @@ -3166,9 +3305,9 @@ defvar vti = vtiToWti.Vti; defvar fwti = vtiToWti.Wti; - defm : VPatConversion; + defm : VPatConversionTA; } } @@ -3178,9 +3317,9 @@ defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; - defm : VPatConversion; + defm : VPatConversionTA; } } @@ -3190,9 +3329,9 @@ defvar vti = vtiToWti.Vti; defvar fwti = vtiToWti.Wti; - defm : VPatConversion; + defm : VPatConversionTA; } } @@ -3202,9 +3341,9 @@ defvar fvti = fvtiToFWti.Vti; defvar iwti = GetIntVTypeInfo.Vti; - defm : VPatConversion; + defm : VPatConversionTA; } } @@ -3214,9 +3353,9 @@ defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; - defm : VPatConversion; + defm : VPatConversionTA; } } @@ -3418,14 +3557,16 @@ (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$rs1), (vti.Mask V0), - VLOpFrag)), + VLOpFrag, + (XLenVT uimm5:$policy))), (!cast("PseudoVSUB_VV_"#vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, - vti.Log2SEW)>; + vti.Log2SEW, + (XLenVT uimm5:$policy))>; // Match VSUB with a small immediate to vadd.vi by negating the immediate. def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector vti.RegClass:$rs1), @@ -3439,14 +3580,16 @@ (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), (vti.Mask V0), - VLOpFrag)), + VLOpFrag, + (XLenVT uimm5:$policy))), (!cast("PseudoVADD_VI_"#vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, (NegImm simm5_plus1:$rs2), (vti.Mask V0), GPR:$vl, - vti.Log2SEW)>; + vti.Log2SEW, + (XLenVT uimm5:$policy))>; } //===----------------------------------------------------------------------===// @@ -3667,17 +3810,17 @@ //===----------------------------------------------------------------------===// // 14.8. Vector Floating-Point Square-Root Instruction //===----------------------------------------------------------------------===// -defm PseudoVFSQRT : VPseudoUnaryV_V; +defm PseudoVFSQRT : VPseudoUnaryTAV_V; //===----------------------------------------------------------------------===// // 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction //===----------------------------------------------------------------------===// -defm PseudoVFRSQRT7 : VPseudoUnaryV_V; +defm PseudoVFRSQRT7 : VPseudoUnaryTAV_V; //===----------------------------------------------------------------------===// // 14.10. Vector Floating-Point Reciprocal Estimate Instruction //===----------------------------------------------------------------------===// -defm PseudoVFREC7 : VPseudoUnaryV_V; +defm PseudoVFREC7 : VPseudoUnaryTAV_V; //===----------------------------------------------------------------------===// // 14.11. Vector Floating-Point Min/Max Instructions @@ -4363,7 +4506,7 @@ //===----------------------------------------------------------------------===// // 14.14. Vector Floating-Point Classify Instruction //===----------------------------------------------------------------------===// -defm : VPatConversionVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">; +defm : VPatClassifyVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">; //===----------------------------------------------------------------------===// // 14.15. Vector Floating-Point Merge Instruction diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -22,6 +22,9 @@ // Helpers to define the VL patterns. //===----------------------------------------------------------------------===// +defvar TAIL_UNDISTURBED = 0; +defvar TAIL_AGNOSTIC = 1; + def SDT_RISCVVLE_VL : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVT<2, XLenVT>]>; def SDT_RISCVVSE_VL : SDTypeProfile<0, 3, [SDTCisVec<0>, SDTCisPtrTy<1>, @@ -266,7 +269,7 @@ (result_type (IMPLICIT_DEF)), op_reg_class:$rs1, op_reg_class:$rs2, - VMV0:$vm, GPR:$vl, sew)>; + VMV0:$vm, GPR:$vl, sew, TAIL_AGNOSTIC)>; } multiclass VPatBinaryVL_XI; + VMV0:$vm, GPR:$vl, sew, TAIL_AGNOSTIC)>; } multiclass VPatBinaryVL_VV_VX { @@ -604,7 +607,7 @@ VLOpFrag), (!cast("PseudoVRSUB_VX_"# vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, - VMV0:$vm, GPR:$vl, vti.Log2SEW)>; + VMV0:$vm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)), (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag), @@ -615,7 +618,7 @@ VLOpFrag), (!cast("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, simm5:$rs2, - VMV0:$vm, GPR:$vl, vti.Log2SEW)>; + VMV0:$vm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } // 12.3. Vector Integer Extension @@ -1210,7 +1213,7 @@ VLOpFrag)), (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, - vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>; + vti.Mask:$vm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; // emul = lmul * 16 / sew defvar vlmul = vti.LMul; @@ -1237,7 +1240,7 @@ VLOpFrag)), (!cast(inst#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1, - vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>; + vti.Mask:$vm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -1281,7 +1284,7 @@ VLOpFrag)), (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, - vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>; + vti.Mask:$vm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; defvar vlmul = vti.LMul; defvar octuple_lmul = vlmul.octuple; @@ -1307,7 +1310,7 @@ VLOpFrag)), (!cast(inst#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1, - vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>; + vti.Mask:$vm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp --- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp +++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp @@ -154,10 +154,14 @@ int OpNo = (int)MI->getOperandNo(&MO); assert(OpNo >= 0 && "Operand number doesn't fit in an 'int' type"); - // Skip VL and SEW operands which are the last two operands if present. - if (RISCVII::hasVLOp(TSFlags) && OpNo == (NumOps - 2)) + // Skip Policy, VL and SEW operands which are the last three operands if + // present. + if (RISCVII::hasPolicyOp(TSFlags) && OpNo == (NumOps - 1)) continue; - if (RISCVII::hasSEWOp(TSFlags) && OpNo == (NumOps - 1)) + unsigned PolicyOpOffset = RISCVII::hasPolicyOp(TSFlags) ? 1 : 0; + if (RISCVII::hasVLOp(TSFlags) && OpNo == (NumOps - 2 - PolicyOpOffset)) + continue; + if (RISCVII::hasSEWOp(TSFlags) && OpNo == (NumOps - 1 - PolicyOpOffset)) continue; // Skip merge op. It should be the first operand after the result. diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s + +declare @llvm.riscv.vadd.nxv8i8.nxv8i8( + , + , + i64); + +define @intrinsic_vadd_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vadd_vv_nxv8i8_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vadd.nxv8i8.nxv8i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vadd.mask.nxv8i8.nxv8i8( + , + , + , + , + i64, i64); + +define @intrinsic_vadd_mask_tu( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vadd_mask_tu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 3, e8, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vadd.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4, i64 0) + + ret %a +} + +define @intrinsic_vadd_mask_ta( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vadd_mask_ta: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 3, e8, m1, ta, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vadd.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + i64 %4, i64 1) + + ret %a +} +