Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2226,8 +2226,15 @@ // register instruction. I.e., if this is an `insertelement` instruction, // and its second operand is a load, then we will generate a LD1, which // are expensive instructions. - if (I && dyn_cast(I->getOperand(1))) - return ST->getVectorInsertExtractBaseCost() + 1; + if (I && dyn_cast(I->getOperand(1))) { + switch (ST->getProcFamily()) { + case AArch64Subtarget::NeoverseV1: + case AArch64Subtarget::NeoverseV2: + return ST->getVectorInsertExtractBaseCost() - 1; + default: + return ST->getVectorInsertExtractBaseCost() + 1; + } + } // FIXME: // If the extract-element and insert-element instructions could be Index: llvm/test/Analysis/CostModel/AArch64/extract-from-vector.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/CostModel/AArch64/extract-from-vector.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=kryo | FileCheck %s --check-prefix=KRYO +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-n1 | FileCheck %s --check-prefix=NEO +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-n2 | FileCheck %s --check-prefix=NEO +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-v1 | FileCheck %s --check-prefix=NEO +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-v2 | FileCheck %s --check-prefix=NEO + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +define void @vectorInstrCost() { +; KRYO-LABEL: 'vectorInstrCost' +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = extractelement <8 x i8> undef, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = extractelement <8 x i8> undef, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t3 = extractelement <4 x i16> undef, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t4 = extractelement <4 x i16> undef, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t5 = extractelement <2 x i32> undef, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t6 = extractelement <2 x i32> undef, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t7 = extractelement <2 x i64> undef, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t8 = extractelement <2 x i64> undef, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t9 = extractelement <4 x half> undef, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = extractelement <4 x half> undef, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t11 = extractelement <2 x float> undef, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t12 = extractelement <2 x float> undef, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t13 = extractelement <2 x double> undef, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t14 = extractelement <2 x double> undef, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NEO-LABEL: 'vectorInstrCost' +; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t1 = extractelement <8 x i8> undef, i32 0 +; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t2 = extractelement <8 x i8> undef, i32 1 +; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t3 = extractelement <4 x i16> undef, i32 0 +; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t4 = extractelement <4 x i16> undef, i32 1 +; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t5 = extractelement <2 x i32> undef, i32 0 +; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t6 = extractelement <2 x i32> undef, i32 1 +; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t7 = extractelement <2 x i64> undef, i32 0 +; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t8 = extractelement <2 x i64> undef, i32 1 +; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t9 = extractelement <4 x half> undef, i32 0 +; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t10 = extractelement <4 x half> undef, i32 1 +; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t11 = extractelement <2 x float> undef, i32 0 +; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t12 = extractelement <2 x float> undef, i32 1 +; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t13 = extractelement <2 x double> undef, i32 0 +; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t14 = extractelement <2 x double> undef, i32 1 +; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %t1 = extractelement <8 x i8> undef, i32 0 + %t2 = extractelement <8 x i8> undef, i32 1 + %t3 = extractelement <4 x i16> undef, i32 0 + %t4 = extractelement <4 x i16> undef, i32 1 + %t5 = extractelement <2 x i32> undef, i32 0 + %t6 = extractelement <2 x i32> undef, i32 1 + %t7 = extractelement <2 x i64> undef, i32 0 + %t8 = extractelement <2 x i64> undef, i32 1 + %t9 = extractelement <4 x half> undef, i32 0 + %t10 = extractelement <4 x half> undef, i32 1 + %t11 = extractelement <2 x float> undef, i32 0 + %t12 = extractelement <2 x float> undef, i32 1 + %t13 = extractelement <2 x double> undef, i32 0 + %t14 = extractelement <2 x double> undef, i32 1 + ret void +} Index: llvm/test/Analysis/CostModel/AArch64/insert-extract.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/insert-extract.ll +++ /dev/null @@ -1,174 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=kryo | FileCheck %s --check-prefix=KRYO -; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-n1 | FileCheck %s --check-prefix=NEO -; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-n2 | FileCheck %s --check-prefix=NEO -; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-v1 | FileCheck %s --check-prefix=NEO -; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-v2 | FileCheck %s --check-prefix=NEO - -target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-gnu" - -define void @vectorInstrCost() { -; KRYO-LABEL: 'vectorInstrCost' -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = extractelement <8 x i8> undef, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = extractelement <8 x i8> undef, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t3 = extractelement <4 x i16> undef, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t4 = extractelement <4 x i16> undef, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t5 = extractelement <2 x i32> undef, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t6 = extractelement <2 x i32> undef, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t7 = extractelement <2 x i64> undef, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t8 = extractelement <2 x i64> undef, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t9 = extractelement <4 x half> undef, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = extractelement <4 x half> undef, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t11 = extractelement <2 x float> undef, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t12 = extractelement <2 x float> undef, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t13 = extractelement <2 x double> undef, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t14 = extractelement <2 x double> undef, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t30 = insertelement <8 x i8> undef, i8 0, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t40 = insertelement <8 x i8> undef, i8 1, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t50 = insertelement <4 x i16> undef, i16 2, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t60 = insertelement <4 x i16> undef, i16 3, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t70 = insertelement <2 x i32> undef, i32 4, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t80 = insertelement <2 x i32> undef, i32 5, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t90 = insertelement <2 x i64> undef, i64 6, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t100 = insertelement <2 x i64> undef, i64 7, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t110 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t120 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t130 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t140 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t150 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t160 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; NEO-LABEL: 'vectorInstrCost' -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t1 = extractelement <8 x i8> undef, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t2 = extractelement <8 x i8> undef, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t3 = extractelement <4 x i16> undef, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t4 = extractelement <4 x i16> undef, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t5 = extractelement <2 x i32> undef, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t6 = extractelement <2 x i32> undef, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t7 = extractelement <2 x i64> undef, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t8 = extractelement <2 x i64> undef, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t9 = extractelement <4 x half> undef, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t10 = extractelement <4 x half> undef, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t11 = extractelement <2 x float> undef, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t12 = extractelement <2 x float> undef, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t13 = extractelement <2 x double> undef, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t14 = extractelement <2 x double> undef, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t30 = insertelement <8 x i8> undef, i8 0, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t40 = insertelement <8 x i8> undef, i8 1, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t50 = insertelement <4 x i16> undef, i16 2, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t60 = insertelement <4 x i16> undef, i16 3, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t70 = insertelement <2 x i32> undef, i32 4, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t80 = insertelement <2 x i32> undef, i32 5, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t90 = insertelement <2 x i64> undef, i64 6, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t100 = insertelement <2 x i64> undef, i64 7, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t110 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t120 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t130 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t140 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t150 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t160 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; - %t1 = extractelement <8 x i8> undef, i32 0 - %t2 = extractelement <8 x i8> undef, i32 1 - %t3 = extractelement <4 x i16> undef, i32 0 - %t4 = extractelement <4 x i16> undef, i32 1 - %t5 = extractelement <2 x i32> undef, i32 0 - %t6 = extractelement <2 x i32> undef, i32 1 - %t7 = extractelement <2 x i64> undef, i32 0 - %t8 = extractelement <2 x i64> undef, i32 1 - %t9 = extractelement <4 x half> undef, i32 0 - %t10 = extractelement <4 x half> undef, i32 1 - %t11 = extractelement <2 x float> undef, i32 0 - %t12 = extractelement <2 x float> undef, i32 1 - %t13 = extractelement <2 x double> undef, i32 0 - %t14 = extractelement <2 x double> undef, i32 1 - - %t30 = insertelement <8 x i8> undef, i8 0, i32 0 - %t40 = insertelement <8 x i8> undef, i8 1, i32 1 - %t50 = insertelement <4 x i16> undef, i16 2, i32 0 - %t60 = insertelement <4 x i16> undef, i16 3, i32 1 - %t70 = insertelement <2 x i32> undef, i32 4, i32 0 - %t80 = insertelement <2 x i32> undef, i32 5, i32 1 - %t90 = insertelement <2 x i64> undef, i64 6, i32 0 - %t100 = insertelement <2 x i64> undef, i64 7, i32 1 - %t110 = insertelement <4 x half> zeroinitializer, half 0.000000e+00, i64 0 - %t120 = insertelement <4 x half> zeroinitializer, half 0.000000e+00, i64 1 - %t130 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 - %t140 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 1 - %t150 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 0 - %t160 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 1 - ret void -} - -;; LD1: Load one single-element structure to one lane of one register. - -define <8 x i8> @LD1_B(<8 x i8> %vec, ptr noundef %i) { -; KRYO-LABEL: 'LD1_B' -; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i8, ptr %i, align 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 -; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %v2 -; -; NEO-LABEL: 'LD1_B' -; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i8, ptr %i, align 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 -; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %v2 -; -entry: - %v1 = load i8, ptr %i, align 1 - %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 - ret <8x i8> %v2 -} - -define <4 x i16> @LD1_H(<4 x i16> %vec, ptr noundef %i) { -; KRYO-LABEL: 'LD1_H' -; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i16, ptr %i, align 2 -; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2 -; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %v2 -; -; NEO-LABEL: 'LD1_H' -; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i16, ptr %i, align 2 -; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2 -; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %v2 -; -entry: - %v1 = load i16, ptr %i, align 2 - %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2 - ret <4 x i16> %v2 -} - -define <4 x i32> @LD1_W(<4 x i32> %vec, ptr noundef %i) { -; KRYO-LABEL: 'LD1_W' -; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i32, ptr %i, align 4 -; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3 -; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %v2 -; -; NEO-LABEL: 'LD1_W' -; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i32, ptr %i, align 4 -; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3 -; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %v2 -; -entry: - %v1 = load i32, ptr %i, align 4 - %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3 - ret <4 x i32> %v2 -} - -define <2 x i64> @LD1_X(<2 x i64> %vec, ptr noundef %i) { -; KRYO-LABEL: 'LD1_X' -; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i64, ptr %i, align 8 -; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0 -; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %v2 -; -; NEO-LABEL: 'LD1_X' -; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i64, ptr %i, align 8 -; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0 -; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %v2 -; -entry: - %v1 = load i64, ptr %i, align 8 - %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0 - ret <2 x i64> %v2 -} Index: llvm/test/Analysis/CostModel/AArch64/insert-into-vector.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/CostModel/AArch64/insert-into-vector.ll @@ -0,0 +1,168 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=kryo | FileCheck %s --check-prefix=KRYO +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-n1 | FileCheck %s --check-prefix=NEO-N12 +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-n2 | FileCheck %s --check-prefix=NEO-N12 +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-v1 | FileCheck %s --check-prefix=NEO-V12 +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mcpu=neoverse-v2 | FileCheck %s --check-prefix=NEO-V12 + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +define void @vectorInstrCost() { +; KRYO-LABEL: 'vectorInstrCost' +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t30 = insertelement <8 x i8> undef, i8 0, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t40 = insertelement <8 x i8> undef, i8 1, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t50 = insertelement <4 x i16> undef, i16 2, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t60 = insertelement <4 x i16> undef, i16 3, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t70 = insertelement <2 x i32> undef, i32 4, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t80 = insertelement <2 x i32> undef, i32 5, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t90 = insertelement <2 x i64> undef, i64 6, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t100 = insertelement <2 x i64> undef, i64 7, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t110 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t120 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t130 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t140 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t150 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t160 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NEO-N12-LABEL: 'vectorInstrCost' +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t30 = insertelement <8 x i8> undef, i8 0, i32 0 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t40 = insertelement <8 x i8> undef, i8 1, i32 1 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t50 = insertelement <4 x i16> undef, i16 2, i32 0 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t60 = insertelement <4 x i16> undef, i16 3, i32 1 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t70 = insertelement <2 x i32> undef, i32 4, i32 0 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t80 = insertelement <2 x i32> undef, i32 5, i32 1 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t90 = insertelement <2 x i64> undef, i64 6, i32 0 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t100 = insertelement <2 x i64> undef, i64 7, i32 1 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t110 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 0 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t120 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 1 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t130 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t140 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 1 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t150 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 0 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t160 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 1 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NEO-V12-LABEL: 'vectorInstrCost' +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t30 = insertelement <8 x i8> undef, i8 0, i32 0 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t40 = insertelement <8 x i8> undef, i8 1, i32 1 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t50 = insertelement <4 x i16> undef, i16 2, i32 0 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t60 = insertelement <4 x i16> undef, i16 3, i32 1 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t70 = insertelement <2 x i32> undef, i32 4, i32 0 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t80 = insertelement <2 x i32> undef, i32 5, i32 1 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t90 = insertelement <2 x i64> undef, i64 6, i32 0 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t100 = insertelement <2 x i64> undef, i64 7, i32 1 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t110 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 0 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t120 = insertelement <4 x half> zeroinitializer, half 0xH0000, i64 1 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t130 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t140 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 1 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %t150 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 0 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t160 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 1 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %t30 = insertelement <8 x i8> undef, i8 0, i32 0 + %t40 = insertelement <8 x i8> undef, i8 1, i32 1 + %t50 = insertelement <4 x i16> undef, i16 2, i32 0 + %t60 = insertelement <4 x i16> undef, i16 3, i32 1 + %t70 = insertelement <2 x i32> undef, i32 4, i32 0 + %t80 = insertelement <2 x i32> undef, i32 5, i32 1 + %t90 = insertelement <2 x i64> undef, i64 6, i32 0 + %t100 = insertelement <2 x i64> undef, i64 7, i32 1 + %t110 = insertelement <4 x half> zeroinitializer, half 0.000000e+00, i64 0 + %t120 = insertelement <4 x half> zeroinitializer, half 0.000000e+00, i64 1 + %t130 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 + %t140 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 1 + %t150 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 0 + %t160 = insertelement <2 x double> zeroinitializer, double 0.000000e+00, i64 1 + ret void +} + +;; LD1: Load one single-element structure to one lane of one register. + +define <8 x i8> @LD1_B(<8 x i8> %vec, ptr noundef %i) { +; KRYO-LABEL: 'LD1_B' +; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i8, ptr %i, align 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %v2 +; +; NEO-N12-LABEL: 'LD1_B' +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i8, ptr %i, align 1 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %v2 +; +; NEO-V12-LABEL: 'LD1_B' +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i8, ptr %i, align 1 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %v2 +; +entry: + %v1 = load i8, ptr %i, align 1 + %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 + ret <8x i8> %v2 +} + +define <4 x i16> @LD1_H(<4 x i16> %vec, ptr noundef %i) { +; KRYO-LABEL: 'LD1_H' +; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i16, ptr %i, align 2 +; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %v2 +; +; NEO-N12-LABEL: 'LD1_H' +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i16, ptr %i, align 2 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %v2 +; +; NEO-V12-LABEL: 'LD1_H' +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i16, ptr %i, align 2 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %v2 +; +entry: + %v1 = load i16, ptr %i, align 2 + %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2 + ret <4 x i16> %v2 +} + +define <4 x i32> @LD1_W(<4 x i32> %vec, ptr noundef %i) { +; KRYO-LABEL: 'LD1_W' +; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i32, ptr %i, align 4 +; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %v2 +; +; NEO-N12-LABEL: 'LD1_W' +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i32, ptr %i, align 4 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %v2 +; +; NEO-V12-LABEL: 'LD1_W' +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i32, ptr %i, align 4 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %v2 +; +entry: + %v1 = load i32, ptr %i, align 4 + %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3 + ret <4 x i32> %v2 +} + +define <2 x i64> @LD1_X(<2 x i64> %vec, ptr noundef %i) { +; KRYO-LABEL: 'LD1_X' +; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i64, ptr %i, align 8 +; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0 +; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %v2 +; +; NEO-N12-LABEL: 'LD1_X' +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i64, ptr %i, align 8 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0 +; NEO-N12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %v2 +; +; NEO-V12-LABEL: 'LD1_X' +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i64, ptr %i, align 8 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0 +; NEO-V12-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %v2 +; +entry: + %v1 = load i64, ptr %i, align 8 + %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0 + ret <2 x i64> %v2 +}