diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -148,6 +148,12 @@ [LLVMPointerType>, llvm_anyint_ty], [NoCapture>, IntrReadMem]>, RISCVVIntrinsic; + // Input: (undisturbed, pointer, vl) + class RISCVUSLoadTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMPointerType>, + llvm_anyint_ty], + [NoCapture>, IntrReadMem]>, RISCVVIntrinsic; // For unit stride fault-only-first load // Input: (pointer, vl) // Output: (data, vl) @@ -158,6 +164,13 @@ [LLVMPointerType>, LLVMMatchType<1>], [NoCapture>]>, RISCVVIntrinsic; + // Input: (undisturbed, pointer, vl) + class RISCVUSLoadFFTU + : Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty], + [LLVMMatchType<0>, LLVMPointerType>, + LLVMMatchType<1>], + [NoCapture>]>, + RISCVVIntrinsic; // For unit stride load with mask // Input: (maskedoff, pointer, mask, vl, ta) class RISCVUSLoadMask @@ -187,6 +200,12 @@ [LLVMPointerType>, llvm_anyint_ty, LLVMMatchType<1>], [NoCapture>, IntrReadMem]>, RISCVVIntrinsic; + // Input: (undisturbed, pointer, stride, vl) + class RISCVSLoadTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMPointerType>, + llvm_anyint_ty, LLVMMatchType<1>], + [NoCapture>, IntrReadMem]>, RISCVVIntrinsic; // For strided load with mask // Input: (maskedoff, pointer, stride, mask, vl, ta) class RISCVSLoadMask @@ -204,6 +223,12 @@ [LLVMPointerType>, llvm_anyvector_ty, llvm_anyint_ty], [NoCapture>, IntrReadMem]>, RISCVVIntrinsic; + // Input: (undisturbed, pointer, index, vl) + class RISCVILoadTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMPointerType>, + llvm_anyvector_ty, llvm_anyint_ty], + [NoCapture>, IntrReadMem]>, RISCVVIntrinsic; // For indexed load with mask // Input: (maskedoff, pointer, index, mask, vl, ta) class RISCVILoadMask @@ -269,6 +294,11 @@ : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + // Input: (undisturbed, vector_in, vl) + class RISCVUnaryAATU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first source vector (with mask). // Input: (vector_in, mask, vl, ta) class RISCVUnaryAAMask @@ -294,6 +324,12 @@ : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + // Input: (undisturbed, vector_in, int_vector_in, vl) + class RISCVRGatherVVTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first and second source vector. // Input: (vector_in, vector_in, int_vector_in, vl, ta) class RISCVRGatherVVMask @@ -308,6 +344,13 @@ [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + // Input: (undisturbed, vector_in, int16_vector_in, vl) + class RISCVRGatherEI16VVTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first and second source vector. // Input: (vector_in, vector_in, int16_vector_in, vl, ta) class RISCVRGatherEI16VVMask @@ -325,6 +368,13 @@ [LLVMMatchType<0>, llvm_anyint_ty, LLVMMatchType<1>], [IntrNoMem]>, RISCVVIntrinsic { } + // Input: (undisturbed, vector_in, xlen_in, vl) + class RISCVGatherVXTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, + LLVMMatchType<1>], + [IntrNoMem]>, RISCVVIntrinsic { + } // For destination vector type is the same as first source vector (with mask). // Second operand is XLen. // Input: (maskedoff, vector_in, xlen_in, mask, vl, ta) @@ -343,6 +393,13 @@ [IntrNoMem]>, RISCVVIntrinsic { let SplatOperand = 2; } + // Input: (undisturbed, vector_in, vector_in/scalar_in, vl) + class RISCVBinaryAAXTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let SplatOperand = 3; + } // For destination vector type is the same as first source vector (with mask). // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) class RISCVBinaryAAXMask @@ -360,6 +417,11 @@ : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + // Input: (undisturbed, vector_in, vector_in/scalar_in, vl) + class RISCVBinaryAAShiftTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first source vector (with mask). // The second source operand must match the destination type or be an XLen scalar. // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) @@ -377,6 +439,13 @@ [IntrNoMem]>, RISCVVIntrinsic { let SplatOperand = 2; } + // Input: (undisturbed, vector_in, vector_in/scalar_in, vl) + class RISCVBinaryABXTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let SplatOperand = 3; + } // For destination vector type is NOT the same as first source vector (with mask). // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) class RISCVBinaryABXMask @@ -394,6 +463,11 @@ : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + // Input: (undisturbed, vector_in, vector_in/scalar_in, vl) + class RISCVBinaryABShiftTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is NOT the same as first source vector (with mask). // The second source operand must match the destination type or be an XLen scalar. // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) @@ -413,6 +487,15 @@ [IntrNoMem]>, RISCVVIntrinsic { let SplatOperand = 2; } + // Input: (undisturbed, vector_in, vector_in/scalar_in, V0, vl) + class RISCVBinaryWithV0TU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let SplatOperand = 3; + } // For binary operations with mask type output and V0 as input. // Output: (mask type output) // Input: (vector_in, vector_in/scalar_in, V0, vl) @@ -460,6 +543,11 @@ : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], [llvm_anyvector_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + // Input: (undisturbed, vector_in, vl) + class RISCVClassifyTU + : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], + [LLVMVectorOfBitcastsToInt<0>, llvm_anyvector_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; // For FP classify operations with mask. // Output: (bit mask type output) // Input: (maskedoff, vector_in, mask, vl) @@ -477,6 +565,13 @@ [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { let SplatOperand = 2; } + // Input: (undisturbed, vector_in, vector_in/scalar_in, vl) + class RISCVSaturatingBinaryAAXTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { + let SplatOperand = 3; + } // For Saturating binary operations with mask. // The destination vector type is the same as first source vector. // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) @@ -496,6 +591,11 @@ : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic; + // Input: (undisturbed, vector_in, vector_in/scalar_in, vl) + class RISCVSaturatingBinaryAAShiftTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic; // For Saturating binary operations with mask. // The destination vector type is the same as first source vector. // The second source operand matches the destination type or is an XLen scalar. @@ -514,6 +614,11 @@ : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic; + // Input: (undisturbed, vector_in, vector_in/scalar_in, vl) + class RISCVSaturatingBinaryABShiftTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic; // For Saturating binary operations with mask. // The destination vector type is NOT the same as first source vector (with mask). // The second source operand matches the destination type or is an XLen scalar. @@ -599,6 +704,11 @@ : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + // Input: (undisturbed, vector_in, vl) + class RISCVUnaryABTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is NOT the same as source vector (with mask). // Input: (maskedoff, vector_in, mask, vl, ta) class RISCVUnaryABMask @@ -614,6 +724,11 @@ : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + // Input: (undisturbed, vector_in, vl) + class RISCVUnaryTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; // For mask unary operations with mask type in/out with mask // Output: (mask type output) // Input: (mask type maskedoff, mask type vector_in, mask, vl) @@ -634,6 +749,11 @@ : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + // Input: (undisturbed, vector_in, vl) + class RISCVConversionTU + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; // For Conversion unary operations with mask. // Input: (maskedoff, vector_in, mask, vl, ta) class RISCVConversionMask @@ -802,18 +922,22 @@ multiclass RISCVUSLoad { def "int_riscv_" # NAME : RISCVUSLoad; + def "int_riscv_" # NAME # "_tu" : RISCVUSLoadTU; def "int_riscv_" # NAME # "_mask" : RISCVUSLoadMask; } multiclass RISCVUSLoadFF { def "int_riscv_" # NAME : RISCVUSLoadFF; + def "int_riscv_" # NAME # "_tu" : RISCVUSLoadFFTU; def "int_riscv_" # NAME # "_mask" : RISCVUSLoadFFMask; } multiclass RISCVSLoad { def "int_riscv_" # NAME : RISCVSLoad; + def "int_riscv_" # NAME # "_tu" : RISCVSLoadTU; def "int_riscv_" # NAME # "_mask" : RISCVSLoadMask; } multiclass RISCVILoad { def "int_riscv_" # NAME : RISCVILoad; + def "int_riscv_" # NAME # "_tu" : RISCVILoadTU; def "int_riscv_" # NAME # "_mask" : RISCVILoadMask; } multiclass RISCVUSStore { @@ -831,50 +955,60 @@ } multiclass RISCVUnaryAA { def "int_riscv_" # NAME : RISCVUnaryAANoMask; + def "int_riscv_" # NAME # "_tu" : RISCVUnaryAATU; def "int_riscv_" # NAME # "_mask" : RISCVUnaryAAMask; } multiclass RISCVUnaryAB { def "int_riscv_" # NAME : RISCVUnaryABNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVUnaryABTU; def "int_riscv_" # NAME # "_mask" : RISCVUnaryABMask; } // AAX means the destination type(A) is the same as the first source // type(A). X means any type for the second source operand. multiclass RISCVBinaryAAX { def "int_riscv_" # NAME : RISCVBinaryAAXNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVBinaryAAXTU; def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAXMask; } // Like RISCVBinaryAAX, but the second operand is used a shift amount so it // must be a vector or an XLen scalar. multiclass RISCVBinaryAAShift { def "int_riscv_" # NAME : RISCVBinaryAAShiftNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVBinaryAAShiftTU; def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAShiftMask; } multiclass RISCVRGatherVV { def "int_riscv_" # NAME : RISCVRGatherVVNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVRGatherVVTU; def "int_riscv_" # NAME # "_mask" : RISCVRGatherVVMask; } multiclass RISCVRGatherVX { def "int_riscv_" # NAME : RISCVGatherVXNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVGatherVXTU; def "int_riscv_" # NAME # "_mask" : RISCVGatherVXMask; } multiclass RISCVRGatherEI16VV { def "int_riscv_" # NAME : RISCVRGatherEI16VVNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVRGatherEI16VVTU; def "int_riscv_" # NAME # "_mask" : RISCVRGatherEI16VVMask; } // ABX means the destination type(A) is different from the first source // type(B). X means any type for the second source operand. multiclass RISCVBinaryABX { def "int_riscv_" # NAME : RISCVBinaryABXNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVBinaryABXTU; def "int_riscv_" # NAME # "_mask" : RISCVBinaryABXMask; } // Like RISCVBinaryABX, but the second operand is used a shift amount so it // must be a vector or an XLen scalar. multiclass RISCVBinaryABShift { def "int_riscv_" # NAME : RISCVBinaryABShiftNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVBinaryABShiftTU; def "int_riscv_" # NAME # "_mask" : RISCVBinaryABShiftMask; } multiclass RISCVBinaryWithV0 { def "int_riscv_" # NAME : RISCVBinaryWithV0; + def "int_riscv_" # NAME # "_tu" : RISCVBinaryWithV0TU; } multiclass RISCVBinaryMaskOutWithV0 { def "int_riscv_" # NAME : RISCVBinaryMOutWithV0; @@ -884,22 +1018,27 @@ } multiclass RISCVSaturatingBinaryAAX { def "int_riscv_" # NAME : RISCVSaturatingBinaryAAXNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVSaturatingBinaryAAXTU; def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAXMask; } multiclass RISCVSaturatingBinaryAAShift { def "int_riscv_" # NAME : RISCVSaturatingBinaryAAShiftNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVSaturatingBinaryAAShiftTU; def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAShiftMask; } multiclass RISCVSaturatingBinaryABShift { def "int_riscv_" # NAME : RISCVSaturatingBinaryABShiftNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVSaturatingBinaryABShiftTU; def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryABShiftMask; } multiclass RISCVTernaryAAAX { def "int_riscv_" # NAME : RISCVTernaryAAAXNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVTernaryAAAXNoMask; def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAAXMask; } multiclass RISCVTernaryAAXA { def "int_riscv_" # NAME : RISCVTernaryAAXANoMask; + def "int_riscv_" # NAME # "_tu" : RISCVTernaryAAXANoMask; def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAXAMask; } multiclass RISCVCompare { @@ -908,14 +1047,17 @@ } multiclass RISCVClassify { def "int_riscv_" # NAME : RISCVClassifyNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVClassifyTU; def "int_riscv_" # NAME # "_mask" : RISCVClassifyMask; } multiclass RISCVTernaryWide { def "int_riscv_" # NAME : RISCVTernaryWideNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVTernaryWideNoMask; def "int_riscv_" # NAME # "_mask" : RISCVTernaryWideMask; } multiclass RISCVReduction { def "int_riscv_" # NAME : RISCVReductionNoMask; + def "int_riscv_" # NAME # "_tu" : RISCVReductionNoMask; def "int_riscv_" # NAME # "_mask" : RISCVReductionMask; } multiclass RISCVMaskUnarySOut { @@ -928,6 +1070,7 @@ } multiclass RISCVConversion { def "int_riscv_" #NAME :RISCVConversionNoMask; + def "int_riscv_" #NAME # "_tu" :RISCVConversionTU; def "int_riscv_" # NAME # "_mask" : RISCVConversionMask; } multiclass RISCVAMO { @@ -1080,12 +1223,24 @@ def int_riscv_vmv_v_v : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + def int_riscv_vmv_v_v_tu : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; def int_riscv_vmv_v_x : Intrinsic<[llvm_anyint_ty], [LLVMVectorElementType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + def int_riscv_vmv_v_x_tu : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMVectorElementType<0>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; def int_riscv_vfmv_v_f : Intrinsic<[llvm_anyfloat_ty], [LLVMVectorElementType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + def int_riscv_vfmv_v_f_tu : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMVectorElementType<0>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; def int_riscv_vmv_x_s : Intrinsic<[LLVMVectorElementType<0>], [llvm_anyint_ty], @@ -1151,6 +1306,7 @@ defm vrgatherei16_vv : RISCVRGatherEI16VV; def "int_riscv_vcompress" : RISCVUnaryAAMaskNoTA; + def "int_riscv_vcompress_tu" : RISCVUnaryAAMaskNoTA; defm vaaddu : RISCVSaturatingBinaryAAX; defm vaadd : RISCVSaturatingBinaryAAX; @@ -1240,6 +1396,13 @@ llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; // Output: (vector) + // Input: (undisturbed, mask type input, vl) + def int_riscv_viota_tu : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // Output: (vector) // Input: (maskedoff, mask type vector_in, mask, vl) def int_riscv_viota_mask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1251,6 +1414,13 @@ // Input: (vl) def int_riscv_vid : RISCVNullaryIntrinsic; + // Output: (vector) + // Input: (undisturbed, vl) + def int_riscv_vid_tu : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // Output: (vector) // Input: (maskedoff, mask, vl) def int_riscv_vid_mask : Intrinsic<[llvm_anyvector_ty], diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -159,6 +159,7 @@ struct VLEPseudo { uint16_t Masked : 1; + uint16_t TailUndisturbed : 1; uint16_t Strided : 1; uint16_t FF : 1; uint16_t Log2SEW : 3; @@ -176,6 +177,7 @@ struct VLX_VSXPseudo { uint16_t Masked : 1; + uint16_t TailUndisturbed : 1; uint16_t Ordered : 1; uint16_t Log2SEW : 3; uint16_t LMUL : 3; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -50,14 +50,19 @@ // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector // load. Done after lowering and combining so that we have a chance to // optimize this to VMV_V_X_VL when the upper bits aren't needed. - if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) + if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL && + N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_TU_VL) continue; - assert(N->getNumOperands() == 3 && "Unexpected number of operands"); + bool IsTailAgnostic = N->getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL; + assert(((IsTailAgnostic && N->getNumOperands() == 3) || + (!IsTailAgnostic && N->getNumOperands() == 4)) && + "Unexpected number of operands"); + unsigned ArgIndex = IsTailAgnostic ? 0 : 1; MVT VT = N->getSimpleValueType(0); - SDValue Lo = N->getOperand(0); - SDValue Hi = N->getOperand(1); - SDValue VL = N->getOperand(2); + SDValue Lo = N->getOperand(ArgIndex); + SDValue Hi = N->getOperand(ArgIndex + 1); + SDValue VL = N->getOperand(ArgIndex + 2); assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && "Unexpected VTs!"); @@ -84,11 +89,13 @@ Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); - SDValue IntID = - CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); - SDValue Ops[] = {Chain, IntID, StackSlot, - CurDAG->getRegister(RISCV::X0, MVT::i64), VL}; - + unsigned LoadInstr = + IsTailAgnostic ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_tu; + SDValue IntID = CurDAG->getTargetConstant(LoadInstr, DL, MVT::i64); + SmallVector Ops = {Chain, IntID}; + if (!IsTailAgnostic) + Ops.push_back(N->getOperand(0)); + Ops.append({StackSlot, CurDAG->getRegister(RISCV::X0, MVT::i64), VL}); SDValue Result = CurDAG->getMemIntrinsicNode( ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), MachineMemOperand::MOLoad); @@ -1142,20 +1149,25 @@ return; } case Intrinsic::riscv_vloxei: + case Intrinsic::riscv_vloxei_tu: case Intrinsic::riscv_vloxei_mask: case Intrinsic::riscv_vluxei: + case Intrinsic::riscv_vluxei_tu: case Intrinsic::riscv_vluxei_mask: { bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || IntNo == Intrinsic::riscv_vluxei_mask; bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || + IntNo == Intrinsic::riscv_vloxei_tu || IntNo == Intrinsic::riscv_vloxei_mask; + bool IsTailUndisturbed = IntNo == Intrinsic::riscv_vloxei_tu || + IntNo == Intrinsic::riscv_vluxei_tu; MVT VT = Node->getSimpleValueType(0); unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); unsigned CurOp = 2; SmallVector Operands; - if (IsMasked) + if (IsMasked || IsTailUndisturbed) Operands.push_back(Node->getOperand(CurOp++)); MVT IndexVT; @@ -1170,8 +1182,8 @@ RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( - IsMasked, IsOrdered, IndexLog2EEW, static_cast(LMUL), - static_cast(IndexLMUL)); + IsMasked, IsTailUndisturbed, IsOrdered, IndexLog2EEW, + static_cast(LMUL), static_cast(IndexLMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); @@ -1183,29 +1195,34 @@ } case Intrinsic::riscv_vlm: case Intrinsic::riscv_vle: + case Intrinsic::riscv_vle_tu: case Intrinsic::riscv_vle_mask: case Intrinsic::riscv_vlse: + case Intrinsic::riscv_vlse_tu: case Intrinsic::riscv_vlse_mask: { bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || IntNo == Intrinsic::riscv_vlse_mask; - bool IsStrided = - IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; + bool IsStrided = IntNo == Intrinsic::riscv_vlse || + IntNo == Intrinsic::riscv_vlse_mask || + IntNo == Intrinsic::riscv_vlse_tu; + bool IsTailUndisturbed = + IntNo == Intrinsic::riscv_vle_tu || IntNo == Intrinsic::riscv_vlse_tu; MVT VT = Node->getSimpleValueType(0); unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); unsigned CurOp = 2; SmallVector Operands; - if (IsMasked) + if (IsMasked || IsTailUndisturbed) Operands.push_back(Node->getOperand(CurOp++)); addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, Operands, /*IsLoad=*/true); RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); - const RISCV::VLEPseudo *P = - RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW, - static_cast(LMUL)); + const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( + IsMasked, IsTailUndisturbed, IsStrided, /*FF*/ false, Log2SEW, + static_cast(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); @@ -1216,15 +1233,17 @@ return; } case Intrinsic::riscv_vleff: + case Intrinsic::riscv_vleff_tu: case Intrinsic::riscv_vleff_mask: { bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; + bool IsTailUndisturbed = IntNo == Intrinsic::riscv_vleff_tu; MVT VT = Node->getSimpleValueType(0); unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); unsigned CurOp = 2; SmallVector Operands; - if (IsMasked) + if (IsMasked || IsTailUndisturbed) Operands.push_back(Node->getOperand(CurOp++)); addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, @@ -1232,9 +1251,9 @@ /*IsLoad=*/true); RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); - const RISCV::VLEPseudo *P = - RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, Log2SEW, - static_cast(LMUL)); + const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( + IsMasked, IsTailUndisturbed, /*Strided*/ false, /*FF*/ true, Log2SEW, + static_cast(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), MVT::Other, MVT::Glue, Operands); @@ -1360,7 +1379,7 @@ RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( - IsMasked, IsOrdered, IndexLog2EEW, static_cast(LMUL), + IsMasked, false, IsOrdered, IndexLog2EEW, static_cast(LMUL), static_cast(IndexLMUL)); MachineSDNode *Store = CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); @@ -1546,8 +1565,8 @@ RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( - /*IsMasked*/ false, /*IsStrided*/ true, /*FF*/ false, Log2SEW, - static_cast(LMUL)); + /*IsMasked*/ false, /*IsTailUndisturbed*/ false, /*IsStrided*/ true, + /*FF*/ false, Log2SEW, static_cast(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -119,12 +119,16 @@ BDECOMPRESS, BDECOMPRESSW, // Vector Extension - // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand - // for the VL value to be used for the operation. + // VMV_V_X_VL and VMV_V_X_TU_VL match the semantics of vmv.v.x but includes an + // extra operand for the VL value to be used for the operation. VMV_V_X_TU_VL + // is for tail undisturbed. VMV_V_X_VL, - // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand - // for the VL value to be used for the operation. + VMV_V_X_TU_VL, + // VFMV_V_F_VL and VFMV_V_F_TU_VL match the semantics of vfmv.v.f but includes + // an extra operand for the VL value to be used for the operation. + // VFMV_V_F_TU_VL is for tail undisturbed. VFMV_V_F_VL, + VFMV_V_F_TU_VL, // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign // extended from the vector element size. VMV_X_S, @@ -137,7 +141,9 @@ SPLAT_VECTOR_I64, // Splats an 64-bit value that has been split into two i32 parts. This is // expanded late to two scalar stores and a stride 0 vector load. + // SPLAT_VECTOR_SPLIT_I64_TU_VL is for tail undisturbed. SPLAT_VECTOR_SPLIT_I64_VL, + SPLAT_VECTOR_SPLIT_I64_TU_VL, // Read VLENB CSR READ_VLENB, // Truncates a RVV integer vector by one power-of-two. Carries both an extra diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2162,42 +2162,53 @@ return SDValue(); } -static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo, - SDValue Hi, SDValue VL, SelectionDAG &DAG) { +static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Merge, + SDValue Lo, SDValue Hi, SDValue VL, + SelectionDAG &DAG) { if (isa(Lo) && isa(Hi)) { int32_t LoC = cast(Lo)->getSExtValue(); int32_t HiC = cast(Hi)->getSExtValue(); // If Hi constant is all the same sign bit as Lo, lower this as a custom // node in order to try and match RVV vector/scalar instructions. - if ((LoC >> 31) == HiC) + if ((LoC >> 31) == HiC) { + if (Merge) + return DAG.getNode(RISCVISD::VMV_V_X_TU_VL, DL, VT, Merge, Lo, VL); return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL); + } } // Fall back to a stack store and stride x0 vector load. + if (Merge) + return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_TU_VL, DL, VT, Merge, + Lo, Hi, VL); return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL); } // Called by type legalization to handle splat of i64 on RV32. // FIXME: We can optimize this when the type has sign or zero bits in one // of the halves. -static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar, - SDValue VL, SelectionDAG &DAG) { +static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Merge, + SDValue Scalar, SDValue VL, + SelectionDAG &DAG) { assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, DAG.getConstant(1, DL, MVT::i32)); - return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG); + return splatPartsI64WithVL(DL, VT, Merge, Lo, Hi, VL, DAG); } // This function lowers a splat of a scalar operand Splat with the vector // length VL. It ensures the final sequence is type legal, which is useful when // lowering a splat after type legalization. -static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL, - SelectionDAG &DAG, +static SDValue lowerScalarSplat(SDValue Merge, SDValue Scalar, SDValue VL, + MVT VT, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - if (VT.isFloatingPoint()) + if (VT.isFloatingPoint()) { + if (Merge) + return DAG.getNode(RISCVISD::VFMV_V_F_TU_VL, DL, VT, Merge, Scalar, VL); return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL); + } MVT XLenVT = Subtarget.getXLenVT(); @@ -2210,6 +2221,8 @@ unsigned ExtOpc = isa(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); + if (Merge) + return DAG.getNode(RISCVISD::VMV_V_X_TU_VL, DL, VT, Merge, Scalar, VL); return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL); } @@ -2217,7 +2230,7 @@ "Unexpected scalar for splat lowering!"); // Otherwise use the more complicated splatting algorithm. - return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); + return splatSplitI64WithVL(DL, VT, Merge, Scalar, VL, DAG); } static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, @@ -2390,7 +2403,8 @@ // TODO: This doesn't trigger for i64 vectors on RV32, since there we // encounter a bitcasted BUILD_VECTOR with low/high i32 values. if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { - Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget); + Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG, + Subtarget); } else { V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); // If only one index is used, we can use a "splat" vrgather. @@ -3712,7 +3726,8 @@ std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG); + SDValue Res = + splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG); return convertFromScalableVector(VecVT, Res, DAG, Subtarget); } @@ -4080,7 +4095,7 @@ // VL should be the last operand. SDValue VL = Op.getOperand(Op.getNumOperands() - 1); assert(VL.getValueType() == XLenVT); - ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG); + ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG); return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); } @@ -4124,11 +4139,18 @@ return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), Op.getOperand(1)); case Intrinsic::riscv_vmv_v_x: - return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2), + return lowerScalarSplat(SDValue(), Op.getOperand(1), Op.getOperand(2), Op.getSimpleValueType(), DL, DAG, Subtarget); + case Intrinsic::riscv_vmv_v_x_tu: + return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2), + Op.getOperand(3), Op.getSimpleValueType(), DL, DAG, + Subtarget); case Intrinsic::riscv_vfmv_v_f: return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::riscv_vfmv_v_f_tu: + return DAG.getNode(RISCVISD::VFMV_V_F_TU_VL, DL, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::riscv_vmv_s_x: { SDValue Scalar = Op.getOperand(2); @@ -4159,7 +4181,8 @@ SDValue Vec = Op.getOperand(1); SDValue VL = Op.getOperand(3); - SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); + SDValue SplattedVal = + splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG); SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getConstant(0, DL, MVT::i32), VL); @@ -4505,8 +4528,9 @@ SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); - SDValue IdentitySplat = lowerScalarSplat( - NeutralElem, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget); + SDValue IdentitySplat = + lowerScalarSplat(SDValue(), NeutralElem, DAG.getConstant(1, DL, XLenVT), + M1VT, DL, DAG, Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec, IdentitySplat, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, @@ -4564,8 +4588,9 @@ SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - SDValue ScalarSplat = lowerScalarSplat( - ScalarVal, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget); + SDValue ScalarSplat = + lowerScalarSplat(SDValue(), ScalarVal, DAG.getConstant(1, DL, XLenVT), + M1VT, DL, DAG, Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), VectorVal, ScalarSplat, Mask, VL); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, @@ -4631,9 +4656,9 @@ MVT XLenVT = Subtarget.getXLenVT(); MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT; - SDValue StartSplat = - lowerScalarSplat(Op.getOperand(0), DAG.getConstant(1, DL, XLenVT), M1VT, - DL, DAG, Subtarget); + SDValue StartSplat = lowerScalarSplat(SDValue(), Op.getOperand(0), + DAG.getConstant(1, DL, XLenVT), M1VT, + DL, DAG, Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, @@ -4936,8 +4961,8 @@ StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); } else { SDValue StepVal = lowerScalarSplat( - DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT, - DL, DAG, Subtarget); + SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), + VL, VT, DL, DAG, Subtarget); StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); } } @@ -9459,12 +9484,15 @@ NODE_NAME_CASE(BDECOMPRESS) NODE_NAME_CASE(BDECOMPRESSW) NODE_NAME_CASE(VMV_V_X_VL) + NODE_NAME_CASE(VMV_V_X_TU_VL) NODE_NAME_CASE(VFMV_V_F_VL) + NODE_NAME_CASE(VFMV_V_F_TU_VL) NODE_NAME_CASE(VMV_X_S) NODE_NAME_CASE(VMV_S_X_VL) NODE_NAME_CASE(VFMV_S_F_VL) NODE_NAME_CASE(SPLAT_VECTOR_I64) NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) + NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_TU_VL) NODE_NAME_CASE(READ_VLENB) NODE_NAME_CASE(TRUNCATE_VECTOR_VL) NODE_NAME_CASE(VSLIDEUP_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -428,8 +428,9 @@ let PrimaryKeyName = "getRISCVVIntrinsicInfo"; } -class RISCVVLE S, bits<3> L> { +class RISCVVLE S, bits<3> L> { bits<1> Masked = M; + bits<1> TailUndisturbed = TU; bits<1> Strided = Str; bits<1> FF = F; bits<3> Log2SEW = S; @@ -440,8 +441,8 @@ def RISCVVLETable : GenericTable { let FilterClass = "RISCVVLE"; let CppTypeName = "VLEPseudo"; - let Fields = ["Masked", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"]; - let PrimaryKey = ["Masked", "Strided", "FF", "Log2SEW", "LMUL"]; + let Fields = ["Masked", "TailUndisturbed", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"]; + let PrimaryKey = ["Masked", "TailUndisturbed", "Strided", "FF", "Log2SEW", "LMUL"]; let PrimaryKeyName = "getVLEPseudo"; } @@ -461,8 +462,9 @@ let PrimaryKeyName = "getVSEPseudo"; } -class RISCVVLX_VSX S, bits<3> L, bits<3> IL> { +class RISCVVLX_VSX S, bits<3> L, bits<3> IL> { bits<1> Masked = M; + bits<1> TailUndisturbed = TU; bits<1> Ordered = O; bits<3> Log2SEW = S; bits<3> LMUL = L; @@ -470,15 +472,15 @@ Pseudo Pseudo = !cast(NAME); } -class RISCVVLX S, bits<3> L, bits<3> IL> : - RISCVVLX_VSX; +class RISCVVLX S, bits<3> L, bits<3> IL> : + RISCVVLX_VSX; class RISCVVSX S, bits<3> L, bits<3> IL> : - RISCVVLX_VSX; + RISCVVLX_VSX; class RISCVVLX_VSXTable : GenericTable { let CppTypeName = "VLX_VSXPseudo"; - let Fields = ["Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"]; - let PrimaryKey = ["Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"]; + let Fields = ["Masked", "TailUndisturbed", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"]; + let PrimaryKey = ["Masked", "TailUndisturbed", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"]; } def RISCVVLXTable : RISCVVLX_VSXTable { @@ -582,11 +584,12 @@ !subst("_B32", "", !subst("_B64", "", !subst("_MASK", "", + !subst("_TU", "", !subst("_TIED", "", !subst("F16", "F", !subst("F32", "F", !subst("F64", "F", - !subst("Pseudo", "", PseudoInst)))))))))))))))))))); + !subst("Pseudo", "", PseudoInst))))))))))))))))))))); } // The destination vector register group for a masked vector instruction cannot @@ -632,7 +635,7 @@ Pseudo<(outs RetClass:$rd), (ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, - RISCVVLE.val, VLMul> { + RISCVVLE.val, VLMul> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -642,13 +645,29 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoUSLoadTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$dest, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVVLE.val, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; + let Constraints = "$rd = $dest"; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoUSLoadMask : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLE.val, VLMul> { + RISCVVLE.val, VLMul> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -664,7 +683,7 @@ Pseudo<(outs RetClass:$rd), (ins GPR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, - RISCVVLE.val, VLMul> { + RISCVVLE.val, VLMul> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -674,13 +693,29 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoSLoadTU: + Pseudo<(outs RetClass:$rd), + (ins RetClass:$dest, GPR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVVLE.val, VLMul> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; + let Constraints = "$rd = $dest"; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoSLoadMask: Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, GPR:$rs1, GPR:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLE.val, VLMul> { + RISCVVLE.val, VLMul> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -697,7 +732,7 @@ Pseudo<(outs RetClass:$rd), (ins GPR:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>, RISCVVPseudo, - RISCVVLX.val, VLMul, LMUL> { + RISCVVLX.val, VLMul, LMUL> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -708,6 +743,23 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoILoadTU LMUL, + bit Ordered, bit EarlyClobber>: + Pseudo<(outs RetClass:$rd), + (ins RetClass:$dest, GPR:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>, + RISCVVPseudo, + RISCVVLX.val, VLMul, LMUL> { + let mayLoad = 1; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $dest", "$rd = $dest"); + let HasMergeOp = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoILoadMask LMUL, bit Ordered, bit EarlyClobber>: Pseudo<(outs GetVRegNoV0.R:$rd), @@ -715,7 +767,7 @@ GPR:$rs1, IdxClass:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLX.val, VLMul, LMUL> { + RISCVVLX.val, VLMul, LMUL> { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -795,6 +847,21 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoUnaryNoDummyMaskTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$dest, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let Constraints = "$rd = $dest"; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoNullaryNoMask: Pseudo<(outs RegClass:$rd), (ins AVL:$vl, ixlenimm:$sew), @@ -808,6 +875,21 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoNullaryNoMaskTU: + Pseudo<(outs RegClass:$rd), + (ins RegClass:$dest, AVL:$vl, ixlenimm:$sew), + []>, RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; + let Constraints = "$rd = $dest"; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoNullaryMask: Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, VMaskOp:$vm, AVL:$vl, @@ -852,6 +934,21 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoUnaryNoMaskTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$dest, OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $dest"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let HasMergeOp = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoUnaryMask : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$merge, OpClass:$rs2, @@ -915,6 +1012,24 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoUnaryAnyMaskTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, + Op1Class:$rs2, + VR:$vm, AVL:$vl, ixlenimm:$sew), + []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = "@earlyclobber $rd, $rd = $merge"; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoBinaryNoMask(PseudoToVInst.VInst); } +class VPseudoBinaryTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$dest, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $dest"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoTiedBinaryNoMask : @@ -950,6 +1083,24 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoTiedBinaryNoMaskTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$rs2, Op2Class:$rs1, GPR:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $rs2"], ",">.ret; + let Uses = [VL, VTYPE]; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + // Not convertible to 3 address. + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoIStoreNoMask LMUL, bit Ordered>: Pseudo<(outs), @@ -1083,6 +1234,29 @@ let VLMul = MInfo.value; } +class VPseudoBinaryCarryInTU : + Pseudo<(outs RetClass:$rd), + !if(CarryIn, + (ins RetClass:$dest, Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl, + ixlenimm:$sew), + (ins RetClass:$dest, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $dest"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); + let VLMul = MInfo.value; +} + class VPseudoTernaryNoMask(PseudoToVInst.VInst); } +class VPseudoTernaryNoMaskTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, + AVL:$vl, ixlenimm:$sew), + []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoTernaryNoMaskWithPolicy(PseudoToVInst.VInst); } +class VPseudoTernaryTU : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, + AVL:$vl, ixlenimm:$sew), + []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasDummyMask = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoAMOWDNoMask : Pseudo<(outs GetVRegNoV0.R:$vd_wd), @@ -1385,6 +1599,8 @@ def "E" # eew # "_V_" # LInfo : VPseudoUSLoadNoMask, VLESched; + def "E" # eew # "_V_" # LInfo # "_TU" : + VPseudoUSLoadTU; def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSLoadMask, VLESched; @@ -1402,6 +1618,8 @@ def "E" # eew # "FF_V_" # LInfo : VPseudoUSLoadNoMask, VLFSched; + def "E" # eew # "FF_V_" # LInfo # "_TU" : + VPseudoUSLoadTU; def "E" # eew # "FF_V_" # LInfo # "_MASK" : VPseudoUSLoadMask, VLFSched; @@ -1426,6 +1644,8 @@ let VLMul = lmul.value in { def "E" # eew # "_V_" # LInfo : VPseudoSLoadNoMask, VLSSched; + def "E" # eew # "_V_" # LInfo # "_TU" : VPseudoSLoadTU, + VLSSched; def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSLoadMask, VLSSched; } @@ -1452,6 +1672,9 @@ def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo : VPseudoILoadNoMask, VLXSched; + def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_TU" : + VPseudoILoadTU, + VLXSched; def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" : VPseudoILoadMask, VLXSched; @@ -1570,6 +1793,8 @@ let VLMul = m.value in { def "_V_" # m.MX : VPseudoNullaryNoMask, Sched<[WriteVMIdxV, ReadVMask]>; + def "_V_" # m.MX # "_TU" : VPseudoNullaryNoMaskTU, + Sched<[WriteVMIdxV, ReadVMask]>; def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask, Sched<[WriteVMIdxV, ReadVMask]>; } @@ -1590,6 +1815,8 @@ let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; + def "_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU, + Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; def "_" # m.MX # "_MASK" : VPseudoUnaryMask, Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; } @@ -1601,6 +1828,8 @@ let VLMul = m.value in def _VM # "_" # m.MX : VPseudoUnaryAnyMask, Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>; + def _VM # "_" # m.MX # "_TU" : VPseudoUnaryAnyMaskTU, + Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>; } } @@ -1612,6 +1841,8 @@ let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoBinaryNoMask; + def "_" # MInfo.MX # "_TU" : VPseudoBinaryTU; def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskTA; } @@ -1640,6 +1871,8 @@ let VLMul = lmul.value in { def "_" # lmul.MX # "_" # emul.MX : VPseudoBinaryNoMask; + def "_" # lmul.MX # "_" # emul.MX # "_TU" : VPseudoBinaryTU; def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskTA; } @@ -1652,6 +1885,8 @@ let VLMul = MInfo.value in { def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask; + def "_" # MInfo.MX # "_TIED_TU": VPseudoTiedBinaryNoMaskTU; def "_" # MInfo.MX # "_MASK_TIED" : VPseudoTiedBinaryMask; } @@ -1801,6 +2036,22 @@ m.vrclass, m.vrclass, m, CarryIn, Constraint>; } +multiclass VPseudoBinaryV_VM_WithTU { + foreach m = MxList.m in { + def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn.R, m.vrclass)), + m.vrclass, m.vrclass, m, CarryIn, Constraint>; + def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" : + VPseudoBinaryCarryInTU.R, m.vrclass)), + m.vrclass, m.vrclass, m, CarryIn, Constraint>; + } +} + multiclass VPseudoBinaryV_XM { foreach m = MxList.m in @@ -1811,13 +2062,34 @@ m.vrclass, GPR, m, CarryIn, Constraint>; } +multiclass VPseudoBinaryV_XM_WithTU { + foreach m = MxList.m in { + def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn.R, m.vrclass)), + m.vrclass, GPR, m, CarryIn, Constraint>; + def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" : + VPseudoBinaryCarryInTU.R, m.vrclass)), + m.vrclass, GPR, m, CarryIn, Constraint>; + } +} + multiclass VPseudoVMRG_FM { foreach m = MxList.m in - foreach f = FPList.fpinfo in + foreach f = FPList.fpinfo in { def "_V" # f.FX # "M_" # m.MX : VPseudoBinaryCarryIn.R, m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">, Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; + def "_V" # f.FX # "M_" # m.MX # "_TU" : + VPseudoBinaryCarryInTU.R, + m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">, + Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; + } } multiclass VPseudoBinaryV_IM; } +multiclass VPseudoBinaryV_IM_WithTU { + foreach m = MxList.m in { + def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn.R, m.vrclass)), + m.vrclass, simm5, m, CarryIn, Constraint>; + def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" : + VPseudoBinaryCarryInTU.R, m.vrclass)), + m.vrclass, simm5, m, CarryIn, Constraint>; + } +} + multiclass VPseudoUnaryVMV_V_X_I { foreach m = MxList.m in { let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoDummyMask, Sched<[WriteVIMovV, ReadVIMovV]>; + def "_V_" # m.MX # "_TU" : VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovV, ReadVIMovV]>; def "_X_" # m.MX : VPseudoUnaryNoDummyMask, Sched<[WriteVIMovX, ReadVIMovX]>; + def "_X_" # m.MX # "_TU" : VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovX, ReadVIMovX]>; def "_I_" # m.MX : VPseudoUnaryNoDummyMask, Sched<[WriteVIMovI]>; + def "_I_" # m.MX # "_TU" : VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovI]>; } } } @@ -1850,6 +2144,9 @@ def "_" # f.FX # "_" # m.MX : VPseudoUnaryNoDummyMask, Sched<[WriteVFMovV, ReadVFMovF]>; + def "_" # f.FX # "_" # m.MX # "_TU" : + VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVFMovV, ReadVFMovF]>; } } } @@ -1860,6 +2157,8 @@ let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; + def "_V_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU, + Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask, Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; } @@ -1871,6 +2170,8 @@ let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; + def "_V_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU, + Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; } @@ -1882,6 +2183,8 @@ let VLMul = m.value in { def "_V_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; + def "_V_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU, + Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; } @@ -1895,6 +2198,8 @@ let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + def "_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; @@ -1909,6 +2214,8 @@ let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + def "_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; @@ -1923,6 +2230,8 @@ let VLMul = m.value in { def "_" # m.MX : VPseudoUnaryNoMask, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + def "_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; @@ -2163,27 +2472,27 @@ } multiclass VPseudoVMRG_VM_XM_IM { - defm "" : VPseudoBinaryV_VM, + defm "" : VPseudoBinaryV_VM_WithTU, Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, + defm "" : VPseudoBinaryV_XM_WithTU, Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; - defm "" : VPseudoBinaryV_IM, + defm "" : VPseudoBinaryV_IM_WithTU, Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; } multiclass VPseudoVCALU_VM_XM_IM { - defm "" : VPseudoBinaryV_VM, + defm "" : VPseudoBinaryV_VM_WithTU, Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, + defm "" : VPseudoBinaryV_XM_WithTU, Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; - defm "" : VPseudoBinaryV_IM, + defm "" : VPseudoBinaryV_IM_WithTU, Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>; } multiclass VPseudoVCALU_VM_XM { - defm "" : VPseudoBinaryV_VM, + defm "" : VPseudoBinaryV_VM_WithTU, Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, + defm "" : VPseudoBinaryV_XM_WithTU, Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; } @@ -2244,6 +2553,7 @@ string Constraint = ""> { let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoTernaryNoMask; + def "_" # MInfo.MX # "_TU" : VPseudoTernaryNoMaskTU; def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask; } } @@ -2257,6 +2567,7 @@ let VLMul = MInfo.value in { let isCommutable = Commutable in def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy; + def "_" # MInfo.MX # "_TU" : VPseudoTernaryTU; def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask; } } @@ -2430,6 +2741,7 @@ string Constraint = ""> { let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoUnaryNoMask; + def "_" # MInfo.MX # "_TU" : VPseudoUnaryNoMaskTU; def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA; } @@ -2637,6 +2949,24 @@ (op2_type op2_reg_class:$rs2), GPR:$vl, sew)>; +class VPatUnaryNoMaskTU : + Pat<(result_type (!cast(intrinsic_name#"_tu") + (result_type result_reg_class:$dest), + (op2_type op2_reg_class:$rs2), + VLOpFrag)), + (!cast(inst#"_"#kind#"_"#vlmul.MX#"_TU") + (result_type result_reg_class:$dest), + (op2_type op2_reg_class:$rs2), + GPR:$vl, sew)>; + class VPatUnaryMask; +class VPatUnaryAnyMaskTU : + Pat<(result_type (!cast(intrinsic#"_tu") + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (mask_type VR:$rs2), + VLOpFrag)), + (!cast(inst#"_"#kind#"_"#vlmul.MX#"_TU") + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (mask_type VR:$rs2), + GPR:$vl, sew)>; + class VPatBinaryNoMask; +class VPatBinaryTU : + Pat<(result_type (!cast(intrinsic_name#"_tu") + (result_type result_reg_class:$dest), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + VLOpFrag)), + (!cast(inst#"_TU") + (result_type result_reg_class:$dest), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + GPR:$vl, sew)>; + // Same as above but source operands are swapped. class VPatBinaryNoMaskSwapped; +class VPatTiedBinaryNoMaskTU : + Pat<(result_type (!cast(intrinsic_name#"_tu") + (result_type result_reg_class:$rs1), + (result_type result_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (XLenVT (VLOp GPR:$vl)))), + (!cast(inst#"_TIED_TU") + (result_type result_reg_class:$rs1), + (op2_type op2_kind:$rs2), + GPR:$vl, sew)>; + class VPatTiedBinaryMask; +class VPatTernaryNoMaskTU : + Pat<(result_type (!cast(intrinsic#"_tu") + (result_type result_reg_class:$rs3), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + VLOpFrag)), + (!cast(inst#"_"#kind#"_"#vlmul.MX#"_TU") + result_reg_class:$rs3, + (op1_type op1_reg_class:$rs1), + op2_kind:$rs2, + GPR:$vl, sew)>; + class VPatTernaryMask; + def : VPatUnaryAnyMaskTU; } } @@ -3002,6 +3416,8 @@ foreach vti = AllIntegerVectors in { def : VPatUnaryNoMask; + def : VPatUnaryNoMaskTU; def : VPatUnaryMask; } @@ -3017,6 +3433,9 @@ def : VPatUnaryNoMask; + def : VPatUnaryNoMaskTU; def : VPatUnaryMaskTA; @@ -3029,6 +3448,9 @@ def : VPatUnaryNoMask; + def : VPatUnaryNoMaskTU; def : VPatUnaryMaskTA; @@ -3042,6 +3464,12 @@ VLOpFrag)), (!cast(instruction#"_V_" # vti.LMul.MX) GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (!cast(intrinsic # "_tu") + (vti.Vector vti.RegClass:$dest), + VLOpFrag)), + (!cast(instruction#"_V_" # vti.LMul.MX # "_TU") + vti.RegClass:$dest, + GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (!cast(intrinsic # "_mask") (vti.Vector vti.RegClass:$merge), (vti.Mask V0), VLOpFrag)), @@ -3090,6 +3518,8 @@ { def : VPatBinaryNoMask; + def : VPatBinaryTU; def : VPatBinaryMaskTA; @@ -3136,6 +3566,32 @@ (mask_type V0), GPR:$vl, sew)>; } +multiclass VPatBinaryCarryInTU +{ + def : Pat<(result_type (!cast(intrinsic#"_tu") + (result_type result_reg_class:$dest), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), + VLOpFrag)), + (!cast(inst#"_"#kind#"_"#vlmul.MX#"_TU") + (result_type result_reg_class:$dest), + (op1_type op1_reg_class:$rs1), + (op2_type op2_kind:$rs2), + (mask_type V0), GPR:$vl, sew)>; +} + multiclass VPatBinaryMaskOut; + def : VPatUnaryNoMaskTU; def : VPatUnaryMask; } @@ -3187,6 +3645,8 @@ { def : VPatUnaryNoMask; + def : VPatUnaryNoMaskTU; def : VPatUnaryMaskTA; } @@ -3299,6 +3759,12 @@ def : VPatTiedBinaryNoMask; + def : VPatTiedBinaryNoMaskTU; + def : VPatBinaryTU; let AddedComplexity = 1 in def : VPatTiedBinaryMask; } +multiclass VPatBinaryV_VM_WithTU vtilist = AllIntegerVectors> { + foreach vti = vtilist in { + defm : VPatBinaryCarryIn; + defm : VPatBinaryCarryInTU; + } +} + multiclass VPatBinaryV_XM vtilist = AllIntegerVectors> { @@ -3383,6 +3866,25 @@ vti.RegClass, vti.ScalarRegClass>; } +multiclass VPatBinaryV_XM_WithTU vtilist = AllIntegerVectors> { + foreach vti = vtilist in { + defm : VPatBinaryCarryIn; + defm : VPatBinaryCarryInTU; + } +} + multiclass VPatBinaryV_IM { foreach vti = AllIntegerVectors in @@ -3393,6 +3895,22 @@ vti.RegClass, simm5>; } +multiclass VPatBinaryV_IM_WithTU { + foreach vti = AllIntegerVectors in { + defm : VPatBinaryCarryIn; + defm : VPatBinaryCarryInTU; + } +} + multiclass VPatBinaryV_V { foreach vti = AllIntegerVectors in defm : VPatBinaryMaskOut; multiclass VPatBinaryV_VM_XM_IM - : VPatBinaryV_VM, - VPatBinaryV_XM, - VPatBinaryV_IM; + : VPatBinaryV_VM_WithTU, + VPatBinaryV_XM_WithTU, + VPatBinaryV_IM_WithTU; multiclass VPatBinaryM_VM_XM_IM : VPatBinaryV_VM, @@ -3503,8 +4021,8 @@ VPatBinaryV_I; multiclass VPatBinaryV_VM_XM - : VPatBinaryV_VM, - VPatBinaryV_XM; + : VPatBinaryV_VM_WithTU, + VPatBinaryV_XM_WithTU; multiclass VPatBinaryM_VM_XM : VPatBinaryV_VM, @@ -3529,6 +4047,9 @@ def : VPatTernaryNoMask; + def : VPatTernaryNoMaskTU; def : VPatTernaryMask; @@ -3549,6 +4070,9 @@ def : VPatTernaryNoMaskWithPolicy; + def : VPatTernaryNoMaskTU; def : VPatTernaryMask; @@ -4001,6 +4525,16 @@ vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (int_riscv_vrsub_tu (vti.Vector vti.RegClass:$dest), + (vti.Vector vti.RegClass:$rs2), + (vti.Vector vti.RegClass:$rs1), + VLOpFrag)), + (!cast("PseudoVSUB_VV_"#vti.LMul.MX#"_TU") + vti.RegClass:$dest, + vti.RegClass:$rs1, + vti.RegClass:$rs2, + GPR:$vl, + vti.Log2SEW)>; def : Pat<(vti.Vector (int_riscv_vrsub_mask (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$rs1), @@ -4024,6 +4558,16 @@ (NegImm simm5_plus1:$rs2), GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (int_riscv_vsub_tu (vti.Vector vti.RegClass:$dest), + (vti.Vector vti.RegClass:$rs1), + (vti.Scalar simm5_plus1:$rs2), + VLOpFrag)), + (!cast("PseudoVADD_VI_"#vti.LMul.MX#"_TU") + vti.RegClass:$dest, + vti.RegClass:$rs1, + (NegImm simm5_plus1:$rs2), + GPR:$vl, + vti.Log2SEW)>; def : Pat<(vti.Vector (int_riscv_vsub_mask (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), @@ -4833,6 +5377,11 @@ VLOpFrag)), (!cast("PseudoVMV_V_V_"#vti.LMul.MX) $rs1, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (int_riscv_vmv_v_v_tu (vti.Vector vti.RegClass:$dest), + (vti.Vector vti.RegClass:$rs1), + VLOpFrag)), + (!cast("PseudoVMV_V_V_"#vti.LMul.MX#"_TU") + $dest, $rs1, GPR:$vl, vti.Log2SEW)>; // vmv.v.x/vmv.v.i are handled in RISCInstrVInstrInfoVVLPatterns.td } @@ -4973,12 +5522,12 @@ // We can use vmerge.vvm to support vector-vector vfmerge. // NOTE: Clang previously used int_riscv_vfmerge for vector-vector, but now uses // int_riscv_vmerge. Support both for compatibility. -defm : VPatBinaryV_VM<"int_riscv_vmerge", "PseudoVMERGE", - /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; -defm : VPatBinaryV_VM<"int_riscv_vfmerge", "PseudoVMERGE", - /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; -defm : VPatBinaryV_XM<"int_riscv_vfmerge", "PseudoVFMERGE", - /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; +defm : VPatBinaryV_VM_WithTU<"int_riscv_vmerge", "PseudoVMERGE", + /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; +defm : VPatBinaryV_VM_WithTU<"int_riscv_vfmerge", "PseudoVMERGE", + /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; +defm : VPatBinaryV_XM_WithTU<"int_riscv_vfmerge", "PseudoVFMERGE", + /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; foreach fvti = AllFloatVectors in { defvar instr = !cast("PseudoVMERGE_VIM_"#fvti.LMul.MX); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -50,10 +50,20 @@ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, SDTCisVT<1, XLenVT>, SDTCisVT<2, XLenVT>]>>; +def riscv_vmv_v_x_tu_vl : SDNode<"RISCVISD::VMV_V_X_TU_VL", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<2, XLenVT>, + SDTCisVT<3, XLenVT>]>>; def riscv_vfmv_v_f_vl : SDNode<"RISCVISD::VFMV_V_F_VL", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>, SDTCisEltOfVec<1, 0>, SDTCisVT<2, XLenVT>]>>; +def riscv_vfmv_v_f_tu_vl : SDNode<"RISCVISD::VFMV_V_F_TU_VL", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>, + SDTCisSameAs<0, 1>, + SDTCisEltOfVec<2, 0>, + SDTCisVT<3, XLenVT>]>>; def riscv_vmv_s_x_vl : SDNode<"RISCVISD::VMV_S_X_VL", SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, @@ -912,11 +922,18 @@ def : Pat<(vti.Vector (riscv_vmv_v_x_vl GPR:$rs2, VLOpFrag)), (!cast("PseudoVMV_V_X_"#vti.LMul.MX) $rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_vmv_v_x_tu_vl vti.Vector:$merge, GPR:$rs2, VLOpFrag)), + (!cast("PseudoVMV_V_X_"#vti.LMul.MX#"_TU") + $merge, $rs2, GPR:$vl, vti.Log2SEW)>; defvar ImmPat = !cast("sew"#vti.SEW#"simm5"); def : Pat<(vti.Vector (riscv_vmv_v_x_vl (ImmPat XLenVT:$imm5), VLOpFrag)), (!cast("PseudoVMV_V_I_"#vti.LMul.MX) XLenVT:$imm5, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_vmv_v_x_tu_vl vti.Vector:$merge, (ImmPat XLenVT:$imm5), + VLOpFrag)), + (!cast("PseudoVMV_V_I_"#vti.LMul.MX#"_TU") + $merge, XLenVT:$imm5, GPR:$vl, vti.Log2SEW)>; } // 12.1. Vector Single-Width Saturating Add and Subtract @@ -1158,6 +1175,10 @@ (fvti.Scalar (fpimm0)), VLOpFrag)), (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) 0, GPR:$vl, fvti.Log2SEW)>; + def : Pat<(fvti.Vector (riscv_vfmv_v_f_tu_vl + fvti.Vector:$merge, (fvti.Scalar (fpimm0)), VLOpFrag)), + (!cast("PseudoVMV_V_I_"#fvti.LMul.MX#"_TU") + $merge, 0, GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), @@ -1165,6 +1186,12 @@ fvti.LMul.MX) (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl, fvti.Log2SEW)>; + def : Pat<(fvti.Vector (riscv_vfmv_v_f_tu_vl + fvti.Vector:$merge, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), + (!cast("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" # + fvti.LMul.MX # "_TU") + $merge, (fvti.Scalar fvti.ScalarRegClass:$rs2), + GPR:$vl, fvti.Log2SEW)>; // 14.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions defm : VPatConvertFP2ISDNode_V_VL; diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu-rv32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu-rv32.ll @@ -0,0 +1,735 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -target-abi ilp32f -verify-machineinstrs \ +; RUN: < %s | FileCheck %s + +declare @llvm.riscv.vadd.nxv1i8.nxv1i8( + , + , + i32); + +declare @llvm.riscv.vadd.tu.nxv1i8.nxv1i8( + , + , + , + i32); + +define @switch_tail_policy( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: switch_tail_policy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-NEXT: vadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd.tu.nxv1i8.nxv1i8( + %0, + %0, + %1, + i32 %2) + + %b = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + %0, + %a, + i32 %2) + + %c = call @llvm.riscv.vadd.tu.nxv1i8.nxv1i8( + %0, + %0, + %b, + i32 %2) + + ret %c +} + +declare @llvm.riscv.vle.tu.nxv1i8( + , + *, + i32); + +define @intrinsic_vle_v_tu_nxv1i8_nxv1i8( %0, * %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vle_v_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.tu.nxv1i8( + %0, + * %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vlse.tu.nxv1i8( + , + *, + i32, + i32); + +define @intrinsic_vlse_v_tu_nxv1i8_nxv1i8( %0, * %1, i32 %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vlse_v_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, mu +; CHECK-NEXT: vlse8.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.tu.nxv1i8( + %0, + * %1, + i32 %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vluxei.tu.nxv1i8.nxv1i64( + , + *, + , + i32); + +define @intrinsic_vluxei_v_tu_nxv1i8_nxv1i8_nxv1i64( %0, * %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_tu_nxv1i8_nxv1i8_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vluxei64.v v8, (a0), v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.tu.nxv1i8.nxv1i64( + %0, + * %1, + %2, + i32 %3) + + ret %a +} + +define @intrinsic_vadd_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vadd_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd.tu.nxv1i8.nxv1i8( + %0, + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vwaddu.tu.nxv1i16.nxv1i8.nxv1i8( + , + , + , + i32); + +define @intrinsic_vwaddu_vv_tu_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwaddu_vv_tu_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vwaddu.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.tu.nxv1i16.nxv1i8.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwaddu.w.tu.nxv1i16.nxv1i8( + , + , + , + i32); + +define @intrinsic_vwaddu.w_wv_tu_nxv1i16_nxv1i16_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_tu_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vwaddu.wv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.tu.nxv1i16.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vzext.tu.nxv1i32.nxv1i8( + , + , + i32); + +define @intrinsic_vzext_vf8_tu_nxv1i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vzext_vf8_tu_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vzext.vf4 v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vzext.tu.nxv1i32.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vadc.tu.nxv1i8.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vadc_vvm_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vadc_vvm_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vadc.vvm v8, v9, v10, v0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadc.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vsll.tu.nxv1i8.nxv1i8( + , + , + , + i32); + +define @intrinsic_vsll_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vsll_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vsll.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vsll.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnsrl.tu.nxv1i8.nxv1i16.nxv1i8( + , + , + , + i32); + +define @intrinsic_vnsrl_wv_tu_nxv1i8_nxv1i16_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vnsrl_wv_tu_nxv1i8_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vnsrl.wv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vnsrl.tu.nxv1i8.nxv1i16.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.tu.nxv1i8.nxv1i8( + , + , + , + i32); + +define @intrinsic_vmacc_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vmacc_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmacc.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmacc.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.tu.nxv1i16.nxv1i8( + , + , + , + i32); + +define @intrinsic_vwmacc_vv_tu_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_tu_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vwmacc.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwmacc.tu.nxv1i16.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vsaddu.tu.nxv1i8.nxv1i8( + , + , + , + i32); + +define @intrinsic_vsaddu_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vsaddu_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vsaddu.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vsaddu.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.tu.nxv1i8.nxv1i8( + , + , + , + , + i32); + +define @intrinsic_vmerge_vvm_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vmerge_vvm_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmerge.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vmv.v.v.tu.nxv1i8( + , + , + i32); + +define @intrinsic_vmv.v.v_v_tu_nxv1i8_nxv1i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.v_v_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmv.v.v.tu.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmv.v.x.tu.nxv1i8( + , + i8, + i32); + +define @intrinsic_vmv.v.x_x_tu_nxv1i8( %0, i8 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.x_x_tu_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmv.v.x.tu.nxv1i8( + %0, + i8 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmv.v.x.tu.nxv1i64( + , + i64, + i32); + +define @intrinsic_vmv.v.x_x_tu_nxv1i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.x_x_tu_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v8, (a0), zero +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmv.v.x.tu.nxv1i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfmv.v.f.tu.nxv1f16( + , + half, + i32); + +define @intrinsic_vfmv.v.f_f_tu_nxv1f16( %0, half %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_tu_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfmv.v.f.tu.nxv1f16( + %0, + half %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfsqrt.tu.nxv1f16( + , + , + i32); + +define @intrinsic_vfsqrt_v_tu_nxv1f16_nxv1f16( +; CHECK-LABEL: intrinsic_vfsqrt_v_tu_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfsqrt.v v8, v9 +; CHECK-NEXT: ret + %0, + %1, + i32 %2) nounwind { +entry: + %a = call @llvm.riscv.vfsqrt.tu.nxv1f16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vfclass.tu.nxv1i16( + , + , + i32); + +define @intrinsic_vfclass_v_tu_nxv1i16_nxv1f16( +; CHECK-LABEL: intrinsic_vfclass_v_tu_nxv1i16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfclass.v v8, v9 +; CHECK-NEXT: ret + %0, + %1, + i32 %2) nounwind { +entry: + %a = call @llvm.riscv.vfclass.tu.nxv1i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslideup.tu.nxv1i8( + , + , + i32, + i32); + +define @intrinsic_vslideup_vx_tu_nxv1i8_nxv1i8( %0, %1, i32 %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vslideup_vx_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslideup.tu.nxv1i8( + %0, + %1, + i32 %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vslide1up.tu.nxv1i8.i8( + , + , + i8, + i32); + +define @intrinsic_vslide1up_vx_tu_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_tu_nxv1i8_nxv1i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.tu.nxv1i8.i8( + %0, + %1, + i8 %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vrgather.vv.tu.nxv1i8.i32( + , + , + , + i32); + +define @intrinsic_vrgather_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vrgather_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vrgather.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vrgather.vv.tu.nxv1i8.i32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vrgatherei16.vv.tu.nxv1i8( + , + , + , + i32); + +define @intrinsic_vrgatherei16_vv_tu_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vrgatherei16_vv_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vrgatherei16.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vrgatherei16.vv.tu.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vcompress.tu.nxv1i8( + , + , + , + i32); + +define @intrinsic_vcompress_vm_tu_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vcompress.tu.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vssrl.tu.nxv1i8.nxv1i8( + , + , + , + i32); + +define @intrinsic_vssrl_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vssrl_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vssrl.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vssrl.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vnclipu.tu.nxv1i8.nxv1i16.nxv1i8( + , + , + , + i32); + +define @intrinsic_vnclipu_wv_tu_nxv1i8_nxv1i16_nxv1i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vnclipu_wv_tu_nxv1i8_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vnclipu.wv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vnclipu.tu.nxv1i8.nxv1i16.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vredsum.tu.nxv8i8.nxv1i8( + , + , + , + i32); + +define @intrinsic_vredsum_vs_tu_nxv8i8_nxv1i8_nxv8i8( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vredsum_vs_tu_nxv8i8_nxv1i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vredsum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vredsum.tu.nxv8i8.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vwredsumu.tu.nxv4i16.nxv1i8( + , + , + , + i32); + +define @intrinsic_vwredsumu_vs_tu_nxv4i16_nxv1i8_nxv4i16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vwredsumu_vs_tu_nxv4i16_nxv1i8_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vwredsumu.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwredsumu.tu.nxv4i16.nxv1i8( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfcvt.f.x.v.tu.nxv1f16.nxv1i16( + , + , + i32); + +define @intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfcvt.f.x.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.f.x.v.tu.nxv1f16.nxv1i16( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.viota.tu.nxv1i8( + , + , + i32); + +define @intrinsic_viota_m_tu_nxv1i8_nxv1i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_viota_m_tu_nxv1i8_nxv1i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: viota.m v8, v0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.viota.tu.nxv1i8( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vid.tu.nxv1i8( + , + i32); + +define @intrinsic_vid_v_tu_nxv1i8( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vid_v_tu_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vid.tu.nxv1i8( + %0, + i32 %1) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu-rv64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu-rv64.ll @@ -0,0 +1,710 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -target-abi lp64d -verify-machineinstrs \ +; RUN: < %s | FileCheck %s + +declare @llvm.riscv.vadd.nxv1i8.nxv1i8( + , + , + i64); + +declare @llvm.riscv.vadd.tu.nxv1i8.nxv1i8( + , + , + , + i64); + +define @switch_tail_policy( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: switch_tail_policy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-NEXT: vadd.vv v9, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd.tu.nxv1i8.nxv1i8( + %0, + %0, + %1, + i64 %2) + + %b = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + %0, + %a, + i64 %2) + + %c = call @llvm.riscv.vadd.tu.nxv1i8.nxv1i8( + %0, + %0, + %b, + i64 %2) + + ret %c +} + +declare @llvm.riscv.vle.tu.nxv1i8( + , + *, + i64); + +define @intrinsic_vle_v_tu_nxv1i8_nxv1i8( %0, * %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vle_v_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.tu.nxv1i8( + %0, + * %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vlse.tu.nxv1i8( + , + *, + i64, + i64); + +define @intrinsic_vlse_v_tu_nxv1i8_nxv1i8( %0, * %1, i64 %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vlse_v_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, mu +; CHECK-NEXT: vlse8.v v8, (a0), a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vlse.tu.nxv1i8( + %0, + * %1, + i64 %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vluxei.tu.nxv1i8.nxv1i64( + , + *, + , + i64); + +define @intrinsic_vluxei_v_tu_nxv1i8_nxv1i8_nxv1i64( %0, * %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vluxei_v_tu_nxv1i8_nxv1i8_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vluxei64.v v8, (a0), v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vluxei.tu.nxv1i8.nxv1i64( + %0, + * %1, + %2, + i64 %3) + + ret %a +} + +define @intrinsic_vadd_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vadd_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd.tu.nxv1i8.nxv1i8( + %0, + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vwaddu.tu.nxv1i16.nxv1i8.nxv1i8( + , + , + , + i64); + +define @intrinsic_vwaddu_vv_tu_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwaddu_vv_tu_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vwaddu.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.tu.nxv1i16.nxv1i8.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwaddu.w.tu.nxv1i16.nxv1i8( + , + , + , + i64); + +define @intrinsic_vwaddu.w_wv_tu_nxv1i16_nxv1i16_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_tu_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vwaddu.wv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.tu.nxv1i16.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vzext.tu.nxv1i64.nxv1i8( + , + , + i64); + +define @intrinsic_vzext_vf8_tu_nxv1i64( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vzext_vf8_tu_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vzext.vf8 v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vzext.tu.nxv1i64.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vadc.tu.nxv1i8.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vadc_vvm_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vadc_vvm_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vadc.vvm v8, v9, v10, v0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadc.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vsll.tu.nxv1i8.nxv1i8( + , + , + , + i64); + +define @intrinsic_vsll_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsll_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vsll.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vsll.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnsrl.tu.nxv1i8.nxv1i16.nxv1i8( + , + , + , + i64); + +define @intrinsic_vnsrl_wv_tu_nxv1i8_nxv1i16_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vnsrl_wv_tu_nxv1i8_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vnsrl.wv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vnsrl.tu.nxv1i8.nxv1i16.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmacc.tu.nxv1i8.nxv1i8( + , + , + , + i64); + +define @intrinsic_vmacc_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vmacc_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmacc.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmacc.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwmacc.tu.nxv1i16.nxv1i8( + , + , + , + i64); + +define @intrinsic_vwmacc_vv_tu_nxv1i16_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwmacc_vv_tu_nxv1i16_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vwmacc.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwmacc.tu.nxv1i16.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vsaddu.tu.nxv1i8.nxv1i8( + , + , + , + i64); + +define @intrinsic_vsaddu_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vsaddu_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vsaddu.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vsaddu.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vmerge.tu.nxv1i8.nxv1i8( + , + , + , + , + i64); + +define @intrinsic_vmerge_vvm_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vmerge_vvm_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmerge.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vmv.v.v.tu.nxv1i8( + , + , + i64); + +define @intrinsic_vmv.v.v_v_tu_nxv1i8_nxv1i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.v_v_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmv.v.v.tu.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmv.v.x.tu.nxv1i8( + , + i8, + i64); + +define @intrinsic_vmv.v.x_x_tu_nxv1i8( %0, i8 %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.x_x_tu_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmv.v.x.tu.nxv1i8( + %0, + i8 %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfmv.v.f.tu.nxv1f16( + , + half, + i64); + +define @intrinsic_vfmv.v.f_f_tu_nxv1f16( %0, half %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_tu_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfmv.v.f.tu.nxv1f16( + %0, + half %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfsqrt.tu.nxv1f16( + , + , + i64); + +define @intrinsic_vfsqrt_v_tu_nxv1f16_nxv1f16( +; CHECK-LABEL: intrinsic_vfsqrt_v_tu_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfsqrt.v v8, v9 +; CHECK-NEXT: ret + %0, + %1, + i64 %2) nounwind { +entry: + %a = call @llvm.riscv.vfsqrt.tu.nxv1f16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vfclass.tu.nxv1i16( + , + , + i64); + +define @intrinsic_vfclass_v_tu_nxv1i16_nxv1f16( +; CHECK-LABEL: intrinsic_vfclass_v_tu_nxv1i16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfclass.v v8, v9 +; CHECK-NEXT: ret + %0, + %1, + i64 %2) nounwind { +entry: + %a = call @llvm.riscv.vfclass.tu.nxv1i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vslideup.tu.nxv1i8( + , + , + i64, + i64); + +define @intrinsic_vslideup_vx_tu_nxv1i8_nxv1i8( %0, %1, i64 %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vslideup_vx_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslideup.tu.nxv1i8( + %0, + %1, + i64 %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vslide1up.tu.nxv1i8.i8( + , + , + i8, + i64); + +define @intrinsic_vslide1up_vx_tu_nxv1i8_nxv1i8_i8( %0, %1, i8 %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_tu_nxv1i8_nxv1i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.tu.nxv1i8.i8( + %0, + %1, + i8 %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vrgather.vv.tu.nxv1i8.i64( + , + , + , + i64); + +define @intrinsic_vrgather_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vrgather_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vrgather.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vrgather.vv.tu.nxv1i8.i64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vrgatherei16.vv.tu.nxv1i8( + , + , + , + i64); + +define @intrinsic_vrgatherei16_vv_tu_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vrgatherei16_vv_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vrgatherei16.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vrgatherei16.vv.tu.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vcompress.tu.nxv1i8( + , + , + , + i64); + +define @intrinsic_vcompress_vm_tu_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_vm_tu_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vcompress.tu.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vssrl.tu.nxv1i8.nxv1i8( + , + , + , + i64); + +define @intrinsic_vssrl_vv_tu_nxv1i8_nxv1i8_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vssrl_vv_tu_nxv1i8_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vssrl.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vssrl.tu.nxv1i8.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vnclipu.tu.nxv1i8.nxv1i16.nxv1i8( + , + , + , + i64); + +define @intrinsic_vnclipu_wv_tu_nxv1i8_nxv1i16_nxv1i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vnclipu_wv_tu_nxv1i8_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vnclipu.wv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vnclipu.tu.nxv1i8.nxv1i16.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vredsum.tu.nxv8i8.nxv1i8( + , + , + , + i64); + +define @intrinsic_vredsum_vs_tu_nxv8i8_nxv1i8_nxv8i8( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vredsum_vs_tu_nxv8i8_nxv1i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vredsum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vredsum.tu.nxv8i8.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vwredsumu.tu.nxv4i16.nxv1i8( + , + , + , + i64); + +define @intrinsic_vwredsumu_vs_tu_nxv4i16_nxv1i8_nxv4i16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vwredsumu_vs_tu_nxv4i16_nxv1i8_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vwredsumu.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwredsumu.tu.nxv4i16.nxv1i8( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfcvt.f.x.v.tu.nxv1f16.nxv1i16( + , + , + i64); + +define @intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfcvt.f.x.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.f.x.v.tu.nxv1f16.nxv1i16( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.viota.tu.nxv1i8( + , + , + i64); + +define @intrinsic_viota_m_tu_nxv1i8_nxv1i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_viota_m_tu_nxv1i8_nxv1i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: viota.m v8, v0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.viota.tu.nxv1i8( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vid.tu.nxv1i8( + , + i64); + +define @intrinsic_vid_v_tu_nxv1i8( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vid_v_tu_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vid.tu.nxv1i8( + %0, + i64 %1) + + ret %a +}