Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -123,6 +123,8 @@ return ST->useRVVForFixedLengthVectors() ? 16 : 0; } + InstructionCost getVRGatherVVCost(MVT VT); + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -259,6 +259,12 @@ return cast(EVT(IndexVT).getTypeForEVT(C)); } +/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv +/// is generally quadratic in the number of vreg implied by LMUL. Note that +/// operand (index and possibly mask) are handled separately. +InstructionCost RISCVTTIImpl::getVRGatherVVCost(MVT VT) { + return getLMULCost(VT) * getLMULCost(VT); +} InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, @@ -307,7 +313,7 @@ LT.second.getVectorNumElements() <= 256)) { VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext()); InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); - return IndexCost + getLMULCost(LT.second); + return IndexCost + getVRGatherVVCost(LT.second); } } break; @@ -327,7 +333,7 @@ VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC); InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); - return 2 * IndexCost + 2 * getLMULCost(LT.second) + MaskCost; + return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost; } } break; @@ -403,11 +409,11 @@ return 2 * LT.first * getLMULCost(LT.second); case TTI::SK_Reverse: { // TODO: Cases to improve here: - // * LMUL > 1 + // * Illegal vector types // * i64 on RV32 // * i1 vector - - // Most of the cost here is producing the vrgather index register + // At low LMUL, most of the cost is producing the vrgather index register. + // At high LMUL, the cost of the vrgather itself will dominate. // Example sequence: // csrr a0, vlenb // srli a0, a0, 3 @@ -416,14 +422,14 @@ // vid.v v9 // vrsub.vx v10, v9, a0 // vrgather.vv v9, v8, v10 - unsigned LenCost = 3; + InstructionCost LenCost = 3; if (LT.second.isFixedLengthVector()) // vrsub.vi has a 5 bit immediate field, otherwise an li suffices LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1; - if (Tp->getElementType()->isIntegerTy(1)) - // Mask operation additionally required extend and truncate - return LT.first * (LenCost + 6); - return LT.first * (LenCost + 3); + InstructionCost GatherCost = 2 + getVRGatherVVCost(LT.second); + // Mask operation additionally required extend and truncate + InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0; + return LT.first * (LenCost + GatherCost + ExtendCost); } } return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp); Index: llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll =================================================================== --- llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll +++ llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll @@ -55,17 +55,20 @@ define void @vector_reverse() { ; CHECK-LABEL: 'vector_reverse' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv16i8 = call @llvm.experimental.vector.reverse.nxv16i8( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv32i8 = call @llvm.experimental.vector.reverse.nxv32i8( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv16i8 = call @llvm.experimental.vector.reverse.nxv16i8( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv32i8 = call @llvm.experimental.vector.reverse.nxv32i8( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv2i16 = call @llvm.experimental.vector.reverse.nxv2i16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv4i16 = call @llvm.experimental.vector.reverse.nxv4i16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv8i16 = call @llvm.experimental.vector.reverse.nxv8i16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv16i16 = call @llvm.experimental.vector.reverse.nxv16i16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv4i32 = call @llvm.experimental.vector.reverse.nxv4i32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv8i32 = call @llvm.experimental.vector.reverse.nxv8i32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv2i64 = call @llvm.experimental.vector.reverse.nxv2i64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv4i64 = call @llvm.experimental.vector.reverse.nxv4i64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv16i1 = call @llvm.experimental.vector.reverse.nxv16i1( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv8i16 = call @llvm.experimental.vector.reverse.nxv8i16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv16i16 = call @llvm.experimental.vector.reverse.nxv16i16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv4i32 = call @llvm.experimental.vector.reverse.nxv4i32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv8i32 = call @llvm.experimental.vector.reverse.nxv8i32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv2i64 = call @llvm.experimental.vector.reverse.nxv2i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv4i64 = call @llvm.experimental.vector.reverse.nxv4i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %reverse_nxv8i64 = call @llvm.experimental.vector.reverse.nxv8i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %reverse_nxv16i64 = call @llvm.experimental.vector.reverse.nxv16i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %reverse_nxv32i64 = call @llvm.experimental.vector.reverse.nxv32i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %reverse_nxv16i1 = call @llvm.experimental.vector.reverse.nxv16i1( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv8i1 = call @llvm.experimental.vector.reverse.nxv8i1( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv4i1 = call @llvm.experimental.vector.reverse.nxv4i1( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv2i1 = call @llvm.experimental.vector.reverse.nxv2i1( undef) @@ -81,6 +84,9 @@ %reverse_nxv8i32 = call @llvm.experimental.vector.reverse.nxv8i32( undef) %reverse_nxv2i64 = call @llvm.experimental.vector.reverse.nxv2i64( undef) %reverse_nxv4i64 = call @llvm.experimental.vector.reverse.nxv4i64( undef) + %reverse_nxv8i64 = call @llvm.experimental.vector.reverse.nxv8i64( undef) + %reverse_nxv16i64 = call @llvm.experimental.vector.reverse.nxv16i64( undef) + %reverse_nxv32i64 = call @llvm.experimental.vector.reverse.nxv32i64( undef) %reverse_nxv16i1 = call @llvm.experimental.vector.reverse.nxv16i1( undef) %reverse_nxv8i1 = call @llvm.experimental.vector.reverse.nxv8i1( undef) %reverse_nxv4i1 = call @llvm.experimental.vector.reverse.nxv4i1( undef) @@ -98,6 +104,9 @@ declare @llvm.experimental.vector.reverse.nxv8i32() declare @llvm.experimental.vector.reverse.nxv2i64() declare @llvm.experimental.vector.reverse.nxv4i64() +declare @llvm.experimental.vector.reverse.nxv8i64() +declare @llvm.experimental.vector.reverse.nxv16i64() +declare @llvm.experimental.vector.reverse.nxv32i64() declare @llvm.experimental.vector.reverse.nxv16i1() declare @llvm.experimental.vector.reverse.nxv8i1() declare @llvm.experimental.vector.reverse.nxv4i1() Index: llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll =================================================================== --- llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll +++ llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll @@ -40,12 +40,12 @@ define <8 x i64> @interleave2_v8i64(<4 x i64> %v0, <4 x i64> %v1) { ; RV32-LABEL: 'interleave2_v8i64' ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res ; ; RV64-LABEL: 'interleave2_v8i64' ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res ; %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> Index: llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll =================================================================== --- llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll +++ llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll @@ -12,16 +12,16 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> @@ -59,27 +59,27 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2half = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4half = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8half = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16half = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16half = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2float = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4float = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8float = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v16float = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v8float = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16float = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2double = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4double = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4double = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> Index: llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll =================================================================== --- llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll +++ llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll @@ -15,21 +15,21 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> Index: llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -36,10 +36,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -51,23 +51,27 @@ ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<%0> = vector-trip-count +; CHECK-NEXT: vp<%1> = original trip-count +; CHECK: ph: +; CHECK-NEXT: EMIT vp<%1> = EXPAND SCEV (zext i32 %n to i64) +; CHECK-NEXT: No successors ; CHECK: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> +; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%n> + vp<%2> * ir<-1> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%4>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> ; CHECK-NEXT: WIDEN ir<%1> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> ; CHECK-NEXT: WIDEN store ir<%arrayidx3>, ir<%add9> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF +(nuw) vp<[[CAN_IV]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<%11> = VF * UF +(nuw) vp<%2> +; CHECK-NEXT: EMIT branch-on-count vp<%11> vp<%0> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -79,10 +83,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -105,7 +109,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class ; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class -; CHECK-NEXT: LV: Loop cost is 25 +; CHECK-NEXT: LV: Loop cost is 31 ; CHECK-NEXT: LV: IC is 1 ; CHECK-NEXT: LV: VF is vscale x 4 ; CHECK-NEXT: LV: Not Interleaving. @@ -168,10 +172,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -183,23 +187,27 @@ ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<%0> = vector-trip-count +; CHECK-NEXT: vp<%1> = original trip-count +; CHECK: ph: +; CHECK-NEXT: EMIT vp<%1> = EXPAND SCEV (zext i32 %n to i64) +; CHECK-NEXT: No successors ; CHECK: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> +; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%n> + vp<%2> * ir<-1> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%4>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> ; CHECK-NEXT: WIDEN ir<%1> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> ; CHECK-NEXT: WIDEN store ir<%arrayidx3>, ir<%conv1> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF +(nuw) vp<[[CAN_IV]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<%11> = VF * UF +(nuw) vp<%2> +; CHECK-NEXT: EMIT branch-on-count vp<%11> vp<%0> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -211,10 +219,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -237,7 +245,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class ; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class -; CHECK-NEXT: LV: Loop cost is 25 +; CHECK-NEXT: LV: Loop cost is 31 ; CHECK-NEXT: LV: IC is 1 ; CHECK-NEXT: LV: VF is vscale x 4 ; CHECK-NEXT: LV: Not Interleaving.