Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2095,6 +2095,18 @@ ArrayRef Mask, int Index, VectorType *SubTp) { Kind = improveShuffleKindFromMask(Kind, Mask); + auto LT = TLI->getTypeLegalizationCost(DL, Tp); + + // Subvector insertions: vector concats are cheap. They are a zip or a mov, + // see also test/CodeGen/AArch64/concat-vector.ll. + if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) { + unsigned NumElts = LT.second.getVectorNumElements(); + auto SubLT = TLI->getTypeLegalizationCost(DL, SubTp); + if (SubLT.second.isVector() && + (Index + SubLT.second.getVectorNumElements()) == NumElts) + return SubLT.first; + } + if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc || Kind == TTI::SK_Reverse) { Index: llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll +++ llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll @@ -36,19 +36,19 @@ define void @concat() { ; CHECK-LABEL: 'concat' -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void Index: llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -3,10 +3,12 @@ define void @vector_insert_extract( %v0, %v1, <16 x i32> %v2) { ; CHECK-LABEL: 'vector_insert_extract' -; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %v0, i64 0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %insert_fixed_into_scalable = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %v0, <16 x i32> %v2, i64 0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_scalable_from_scalable = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( %v1, i64 0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_scalable_into_scalable = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( %v1, %v0, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %v0, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %insert_fixed_into_scalable = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %v0, <16 x i32> %v2, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_scalable_from_scalable = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( %v1, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_scalable_into_scalable = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( %v1, %v0, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %v0, i64 0) %insert_fixed_into_scalable = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %v0, <16 x i32> %v2, i64 0) %extract_scalable_from_scalable = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( %v1, i64 0) @@ -21,24 +23,32 @@ define void @reductions( %v0, %v1, %v2, %v3) { ; CHECK-LABEL: 'reductions' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32( %v0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64( %v1) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32( %v0) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64( %v1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32( %v0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64( %v1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32( %v0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64( %v1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32( %v0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64( %v1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32( %v0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64( %v1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32( %v0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64( %v1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32( %v0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64( %v1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32( %v0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32( %v0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32( %v0) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32( %v0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32( %v0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32( %v0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32( %v0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32( %v0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32( %v0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32( %v0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, %v2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, %v3) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32( %v2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64( %v3) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32( %v2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64( %v3) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32( %v0) %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64( %v1) %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32( %v0) @@ -58,12 +68,6 @@ %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32( %v0) %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64( %v1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, %v2) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, %v3) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32( %v2) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64( %v3) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32( %v2) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64( %v3) %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, %v2) %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, %v3) %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32( %v2) @@ -76,10 +80,12 @@ define void @strict_fp_reductions( %v0, %v1) { ; CHECK-LABEL: 'strict_fp_reductions' -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %fadd_nxv4f32 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, %v0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %fadd_nxv4f64 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, %v1) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmul_nxv4f32 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, %v0) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmul_nxv4f64 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, %v1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %fadd_nxv4f32 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, %v0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %fadd_nxv4f64 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmul_nxv4f32 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, %v0) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmul_nxv4f64 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, %v1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %fadd_nxv4f32 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, %v0) %fadd_nxv4f64 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, %v1) %fmul_nxv4f32 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, %v0) @@ -118,8 +124,10 @@ define void @count_zeroes( %A) { ; CHECK-LABEL: 'count_zeroes' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call @llvm.ctlz.nxv4i32( %A, i1 true) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call @llvm.cttz.nxv4i32( %A, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call @llvm.ctlz.nxv4i32( %A, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call @llvm.cttz.nxv4i32( %A, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %ctlz = call @llvm.ctlz.nxv4i32( %A, i1 true) %cttz = call @llvm.cttz.nxv4i32( %A, i1 true) ret void @@ -129,34 +137,36 @@ define void @vector_reverse() #0 { -; CHECK-LABEL: 'vector_reverse': -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv16i8 = call @llvm.experimental.vector.reverse.nxv16i8( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv32i8 = call @llvm.experimental.vector.reverse.nxv32i8( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i16 = call @llvm.experimental.vector.reverse.nxv2i16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i16 = call @llvm.experimental.vector.reverse.nxv4i16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8i16 = call @llvm.experimental.vector.reverse.nxv8i16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16i16 = call @llvm.experimental.vector.reverse.nxv16i16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i32 = call @llvm.experimental.vector.reverse.nxv4i32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv8i32 = call @llvm.experimental.vector.reverse.nxv8i32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i64 = call @llvm.experimental.vector.reverse.nxv2i64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv4i64 = call @llvm.experimental.vector.reverse.nxv4i64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f16 = call @llvm.experimental.vector.reverse.nxv2f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4f16 = call @llvm.experimental.vector.reverse.nxv4f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8f16 = call @llvm.experimental.vector.reverse.nxv8f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16f16 = call @llvm.experimental.vector.reverse.nxv16f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f32 = call @llvm.experimental.vector.reverse.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4f32 = call @llvm.experimental.vector.reverse.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv8f32 = call @llvm.experimental.vector.reverse.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f64 = call @llvm.experimental.vector.reverse.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv4f64 = call @llvm.experimental.vector.reverse.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2bf16 = call @llvm.experimental.vector.reverse.nxv2bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4bf16 = call @llvm.experimental.vector.reverse.nxv4bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8bf16 = call @llvm.experimental.vector.reverse.nxv8bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16bf16 = call @llvm.experimental.vector.reverse.nxv16bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv16i1 = call @llvm.experimental.vector.reverse.nxv16i1( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8i1 = call @llvm.experimental.vector.reverse.nxv8i1( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i1 = call @llvm.experimental.vector.reverse.nxv4i1( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i1 = call @llvm.experimental.vector.reverse.nxv2i1( undef) +; CHECK-LABEL: 'vector_reverse' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv16i8 = call @llvm.experimental.vector.reverse.nxv16i8( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv32i8 = call @llvm.experimental.vector.reverse.nxv32i8( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i16 = call @llvm.experimental.vector.reverse.nxv2i16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i16 = call @llvm.experimental.vector.reverse.nxv4i16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8i16 = call @llvm.experimental.vector.reverse.nxv8i16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16i16 = call @llvm.experimental.vector.reverse.nxv16i16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i32 = call @llvm.experimental.vector.reverse.nxv4i32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv8i32 = call @llvm.experimental.vector.reverse.nxv8i32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i64 = call @llvm.experimental.vector.reverse.nxv2i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv4i64 = call @llvm.experimental.vector.reverse.nxv4i64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f16 = call @llvm.experimental.vector.reverse.nxv2f16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4f16 = call @llvm.experimental.vector.reverse.nxv4f16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8f16 = call @llvm.experimental.vector.reverse.nxv8f16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16f16 = call @llvm.experimental.vector.reverse.nxv16f16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f32 = call @llvm.experimental.vector.reverse.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4f32 = call @llvm.experimental.vector.reverse.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv8f32 = call @llvm.experimental.vector.reverse.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f64 = call @llvm.experimental.vector.reverse.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv4f64 = call @llvm.experimental.vector.reverse.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2bf16 = call @llvm.experimental.vector.reverse.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4bf16 = call @llvm.experimental.vector.reverse.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8bf16 = call @llvm.experimental.vector.reverse.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16bf16 = call @llvm.experimental.vector.reverse.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv16i1 = call @llvm.experimental.vector.reverse.nxv16i1( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8i1 = call @llvm.experimental.vector.reverse.nxv8i1( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i1 = call @llvm.experimental.vector.reverse.nxv4i1( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i1 = call @llvm.experimental.vector.reverse.nxv2i1( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %reverse_nxv16i8 = call @llvm.experimental.vector.reverse.nxv16i8( undef) %reverse_nxv32i8 = call @llvm.experimental.vector.reverse.nxv32i8( undef) @@ -217,15 +227,17 @@ define void @unsupported_fp_ops( %vec) { ; CHECK-LABEL: 'unsupported_fp_ops' -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %sin = call @llvm.sin.nxv4f32( %vec) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %cos = call @llvm.cos.nxv4f32( %vec) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %pow = call @llvm.pow.nxv4f32( %vec, %vec) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %exp = call @llvm.exp.nxv4f32( %vec) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %exp2 = call @llvm.exp2.nxv4f32( %vec) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %log = call @llvm.log.nxv4f32( %vec) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %log2 = call @llvm.log2.nxv4f32( %vec) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %log10 = call @llvm.log10.nxv4f32( %vec) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %sin = call @llvm.sin.nxv4f32( %vec) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %cos = call @llvm.cos.nxv4f32( %vec) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %pow = call @llvm.pow.nxv4f32( %vec, %vec) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %exp = call @llvm.exp.nxv4f32( %vec) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %exp2 = call @llvm.exp2.nxv4f32( %vec) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %log = call @llvm.log.nxv4f32( %vec) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %log2 = call @llvm.log2.nxv4f32( %vec) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %log10 = call @llvm.log10.nxv4f32( %vec) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %sin = call @llvm.sin.nxv4f32( %vec) %cos = call @llvm.cos.nxv4f32( %vec) @@ -305,6 +317,8 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv8i1_neg = call @llvm.experimental.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv4i1_neg = call @llvm.experimental.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv2i1_neg = call @llvm.experimental.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %splice_nxv16i8 = call @llvm.experimental.vector.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 1) %splice_nxv32i8 = call @llvm.experimental.vector.splice.nxv32i8( zeroinitializer, zeroinitializer, i32 1)