Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1594,9 +1594,12 @@ if (!LT.second.isVector()) return 0; - // The type may be split. Normalize the index to the new type. - unsigned Width = LT.second.getVectorNumElements(); - Index = Index % Width; + // The type may be split. For fixed-width vectors we can normalize the + // index to the new type. + if (LT.second.isFixedLengthVector()) { + unsigned Width = LT.second.getVectorNumElements(); + Index = Index % Width; + } // The element at index zero is already inside the vector. if (Index == 0) Index: llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -cost-model -analyze -S < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + + +define void @ins_el0() #0 { +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 0 + %v0 = insertelement zeroinitializer, i8 0, i64 0 + %v1 = insertelement zeroinitializer, i16 0, i64 0 + %v2 = insertelement zeroinitializer, i32 0, i64 0 + %v3 = insertelement zeroinitializer, i64 0, i64 0 + ret void +} + +define void @ins_el1() #0 { +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 1 + %v0 = insertelement zeroinitializer, i8 0, i64 1 + %v1 = insertelement zeroinitializer, i16 0, i64 1 + %v2 = insertelement zeroinitializer, i32 0, i64 1 + %v3 = insertelement zeroinitializer, i64 0, i64 1 + ret void +} + + +define void @ext_el0() #0 { +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v0 = extractelement zeroinitializer, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v1 = extractelement zeroinitializer, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v2 = extractelement zeroinitializer, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v3 = extractelement zeroinitializer, i64 0 + %v0 = extractelement zeroinitializer, i64 0 + %v1 = extractelement zeroinitializer, i64 0 + %v2 = extractelement zeroinitializer, i64 0 + %v3 = extractelement zeroinitializer, i64 0 + ret void +} + +define void @ext_el1() #0 { +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v0 = extractelement zeroinitializer, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v1 = extractelement zeroinitializer, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v2 = extractelement zeroinitializer, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v3 = extractelement zeroinitializer, i64 1 + %v0 = extractelement zeroinitializer, i64 1 + %v1 = extractelement zeroinitializer, i64 1 + %v2 = extractelement zeroinitializer, i64 1 + %v3 = extractelement zeroinitializer, i64 1 + ret void +} + + +attributes #0 = { "target-features"="+sve" vscale_range(1, 16) } Index: llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -3,8 +3,8 @@ define void @vector_insert_extract( %v0, %v1, <16 x i32> %v2) { ; CHECK-LABEL: 'vector_insert_extract' -; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %v0, i64 0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %insert_fixed_into_scalable = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %v0, <16 x i32> %v2, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %v0, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %insert_fixed_into_scalable = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %v0, <16 x i32> %v2, i64 0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_scalable_from_scalable = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( %v1, i64 0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_scalable_into_scalable = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( %v1, %v0, i64 0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void