diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -123,6 +123,8 @@ return ST->useRVVForFixedLengthVectors() ? 16 : 0; } + InstructionCost getVRGatherVVCost(MVT VT); + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -263,6 +263,12 @@ return cast(EVT(IndexVT).getTypeForEVT(C)); } +/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv +/// is generally quadratic in the number of vreg implied by LMUL. Note that +/// operand (index and possibly mask) are handled separately. +InstructionCost RISCVTTIImpl::getVRGatherVVCost(MVT VT) { + return getLMULCost(VT) * getLMULCost(VT); +} InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, @@ -311,7 +317,7 @@ LT.second.getVectorNumElements() <= 256)) { VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext()); InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); - return IndexCost + getLMULCost(LT.second); + return IndexCost + getVRGatherVVCost(LT.second); } } break; @@ -331,7 +337,7 @@ VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC); InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); - return 2 * IndexCost + 2 * getLMULCost(LT.second) + MaskCost; + return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost; } } break; @@ -407,11 +413,11 @@ return 2 * LT.first * getLMULCost(LT.second); case TTI::SK_Reverse: { // TODO: Cases to improve here: - // * LMUL > 1 + // * Illegal vector types // * i64 on RV32 // * i1 vector - - // Most of the cost here is producing the vrgather index register + // At low LMUL, most of the cost is producing the vrgather index register. + // At high LMUL, the cost of the vrgather itself will dominate. // Example sequence: // csrr a0, vlenb // srli a0, a0, 3 @@ -420,14 +426,14 @@ // vid.v v9 // vrsub.vx v10, v9, a0 // vrgather.vv v9, v8, v10 - unsigned LenCost = 3; + InstructionCost LenCost = 3; if (LT.second.isFixedLengthVector()) // vrsub.vi has a 5 bit immediate field, otherwise an li suffices LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1; - if (Tp->getElementType()->isIntegerTy(1)) - // Mask operation additionally required extend and truncate - return LT.first * (LenCost + 6); - return LT.first * (LenCost + 3); + InstructionCost GatherCost = 2 + getVRGatherVVCost(LT.second); + // Mask operation additionally required extend and truncate + InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0; + return LT.first * (LenCost + GatherCost + ExtendCost); } } return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp); diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll --- a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll @@ -55,17 +55,20 @@ define void @vector_reverse() { ; CHECK-LABEL: 'vector_reverse' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv16i8 = call @llvm.experimental.vector.reverse.nxv16i8( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv32i8 = call @llvm.experimental.vector.reverse.nxv32i8( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv16i8 = call @llvm.experimental.vector.reverse.nxv16i8( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv32i8 = call @llvm.experimental.vector.reverse.nxv32i8( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv2i16 = call @llvm.experimental.vector.reverse.nxv2i16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv4i16 = call @llvm.experimental.vector.reverse.nxv4i16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv8i16 = call @llvm.experimental.vector.reverse.nxv8i16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv16i16 = call @llvm.experimental.vector.reverse.nxv16i16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv4i32 = call @llvm.experimental.vector.reverse.nxv4i32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv8i32 = call @llvm.experimental.vector.reverse.nxv8i32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv2i64 = call @llvm.experimental.vector.reverse.nxv2i64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv4i64 = call @llvm.experimental.vector.reverse.nxv4i64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv16i1 = call @llvm.experimental.vector.reverse.nxv16i1( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv8i16 = call @llvm.experimental.vector.reverse.nxv8i16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv16i16 = call @llvm.experimental.vector.reverse.nxv16i16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv4i32 = call @llvm.experimental.vector.reverse.nxv4i32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv8i32 = call @llvm.experimental.vector.reverse.nxv8i32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv2i64 = call @llvm.experimental.vector.reverse.nxv2i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv4i64 = call @llvm.experimental.vector.reverse.nxv4i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %reverse_nxv8i64 = call @llvm.experimental.vector.reverse.nxv8i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %reverse_nxv16i64 = call @llvm.experimental.vector.reverse.nxv16i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %reverse_nxv32i64 = call @llvm.experimental.vector.reverse.nxv32i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %reverse_nxv16i1 = call @llvm.experimental.vector.reverse.nxv16i1( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv8i1 = call @llvm.experimental.vector.reverse.nxv8i1( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv4i1 = call @llvm.experimental.vector.reverse.nxv4i1( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv2i1 = call @llvm.experimental.vector.reverse.nxv2i1( undef) @@ -81,6 +84,9 @@ %reverse_nxv8i32 = call @llvm.experimental.vector.reverse.nxv8i32( undef) %reverse_nxv2i64 = call @llvm.experimental.vector.reverse.nxv2i64( undef) %reverse_nxv4i64 = call @llvm.experimental.vector.reverse.nxv4i64( undef) + %reverse_nxv8i64 = call @llvm.experimental.vector.reverse.nxv8i64( undef) + %reverse_nxv16i64 = call @llvm.experimental.vector.reverse.nxv16i64( undef) + %reverse_nxv32i64 = call @llvm.experimental.vector.reverse.nxv32i64( undef) %reverse_nxv16i1 = call @llvm.experimental.vector.reverse.nxv16i1( undef) %reverse_nxv8i1 = call @llvm.experimental.vector.reverse.nxv8i1( undef) %reverse_nxv4i1 = call @llvm.experimental.vector.reverse.nxv4i1( undef) @@ -98,6 +104,9 @@ declare @llvm.experimental.vector.reverse.nxv8i32() declare @llvm.experimental.vector.reverse.nxv2i64() declare @llvm.experimental.vector.reverse.nxv4i64() +declare @llvm.experimental.vector.reverse.nxv8i64() +declare @llvm.experimental.vector.reverse.nxv16i64() +declare @llvm.experimental.vector.reverse.nxv32i64() declare @llvm.experimental.vector.reverse.nxv16i1() declare @llvm.experimental.vector.reverse.nxv8i1() declare @llvm.experimental.vector.reverse.nxv4i1() diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll @@ -40,12 +40,12 @@ define <8 x i64> @interleave2_v8i64(<4 x i64> %v0, <4 x i64> %v1) { ; RV32-LABEL: 'interleave2_v8i64' ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res ; ; RV64-LABEL: 'interleave2_v8i64' ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res ; %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll @@ -12,16 +12,16 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> @@ -59,27 +59,27 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2half = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4half = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8half = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16half = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16half = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2float = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4float = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8float = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v16float = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v8float = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16float = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2double = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4double = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4double = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v8double = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %v16double = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll @@ -15,21 +15,21 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -78,39 +78,11 @@ define void @load_store_factor2_i64(ptr %p) { ; CHECK-LABEL: @load_store_factor2_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP6]], i32 -1 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP7]], <8 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] +; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]] ; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 4 ; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 ; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 4 @@ -121,7 +93,7 @@ ; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 4 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -179,7 +151,7 @@ ; CHECK-NEXT: store <6 x i32> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -205,7 +177,7 @@ ; CHECK-NEXT: store i32 [[Y2]], ptr [[Q2]], align 4 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -440,7 +412,7 @@ ; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -460,7 +432,7 @@ ; CHECK-NEXT: store i32 [[RES]], ptr [[DST]], align 4 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -510,7 +482,7 @@ ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -530,7 +502,7 @@ ; CHECK-NEXT: store i64 [[RES]], ptr [[DST]], align 4 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -36,10 +36,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -51,23 +51,27 @@ ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<%0> = vector-trip-count +; CHECK-NEXT: vp<%1> = original trip-count +; CHECK: ph: +; CHECK-NEXT: EMIT vp<%1> = EXPAND SCEV (zext i32 %n to i64) +; CHECK-NEXT: No successors ; CHECK: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> +; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%n> + vp<%2> * ir<-1> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%4>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> ; CHECK-NEXT: WIDEN ir<%1> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> ; CHECK-NEXT: WIDEN store ir<%arrayidx3>, ir<%add9> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF +(nuw) vp<[[CAN_IV]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<%11> = VF * UF +(nuw) vp<%2> +; CHECK-NEXT: EMIT branch-on-count vp<%11> vp<%0> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -79,10 +83,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -105,7 +109,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class ; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class -; CHECK-NEXT: LV: Loop cost is 25 +; CHECK-NEXT: LV: Loop cost is 31 ; CHECK-NEXT: LV: IC is 1 ; CHECK-NEXT: LV: VF is vscale x 4 ; CHECK-NEXT: LV: Not Interleaving. @@ -168,10 +172,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -183,23 +187,27 @@ ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<%0> = vector-trip-count +; CHECK-NEXT: vp<%1> = original trip-count +; CHECK: ph: +; CHECK-NEXT: EMIT vp<%1> = EXPAND SCEV (zext i32 %n to i64) +; CHECK-NEXT: No successors ; CHECK: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> +; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%n> + vp<%2> * ir<-1> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%4>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> ; CHECK-NEXT: WIDEN ir<%1> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> ; CHECK-NEXT: WIDEN store ir<%arrayidx3>, ir<%conv1> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF +(nuw) vp<[[CAN_IV]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<%11> = VF * UF +(nuw) vp<%2> +; CHECK-NEXT: EMIT branch-on-count vp<%11> vp<%0> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -211,10 +219,10 @@ ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom -; CHECK-NEXT: LV: Found an estimated cost of 8 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 +; CHECK-NEXT: LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 @@ -237,7 +245,7 @@ ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class ; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class -; CHECK-NEXT: LV: Loop cost is 25 +; CHECK-NEXT: LV: Loop cost is 31 ; CHECK-NEXT: LV: IC is 1 ; CHECK-NEXT: LV: VF is vscale x 4 ; CHECK-NEXT: LV: Not Interleaving.