diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -120,6 +120,8 @@ int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index); + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getArithmeticInstrCost( diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -469,6 +469,14 @@ return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind); } +unsigned AArch64TTIImpl::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) { + if (CostKind != TTI::TCK_RecipThroughput) + return Opcode == Instruction::PHI ? 0 : 1; + // Branches are assumed to be predicted. + return CostKind == TTI::TCK_RecipThroughput ? 0 : 1; +} + int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { assert(Val->isVectorTy() && "This must be a vector type"); diff --git a/llvm/test/Analysis/CostModel/AArch64/aggregates.ll b/llvm/test/Analysis/CostModel/AArch64/aggregates.ll --- a/llvm/test/Analysis/CostModel/AArch64/aggregates.ll +++ b/llvm/test/Analysis/CostModel/AArch64/aggregates.ll @@ -4,63 +4,119 @@ ; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=code-size -analyze | FileCheck %s --check-prefixes=ALL,CODESIZE define i32 @extract_first_i32({i32, i32} %agg) { -; ALL-LABEL: 'extract_first_i32' -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; THROUGHPUT-LABEL: 'extract_first_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_first_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_first_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r ; %r = extractvalue {i32, i32} %agg, 0 ret i32 %r } define i32 @extract_second_i32({i32, i32} %agg) { -; ALL-LABEL: 'extract_second_i32' -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; THROUGHPUT-LABEL: 'extract_second_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_second_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_second_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r ; %r = extractvalue {i32, i32} %agg, 1 ret i32 %r } define i32 @extract_i32({i32, i1} %agg) { -; ALL-LABEL: 'extract_i32' -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; THROUGHPUT-LABEL: 'extract_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r ; %r = extractvalue {i32, i1} %agg, 0 ret i32 %r } define i1 @extract_i1({i32, i1} %agg) { -; ALL-LABEL: 'extract_i1' -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i1 %r +; THROUGHPUT-LABEL: 'extract_i1' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %r +; +; LATENCY-LABEL: 'extract_i1' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i1 %r +; +; CODESIZE-LABEL: 'extract_i1' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i1 %r ; %r = extractvalue {i32, i1} %agg, 1 ret i1 %r } define float @extract_float({i32, float} %agg) { -; ALL-LABEL: 'extract_float' -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %r +; THROUGHPUT-LABEL: 'extract_float' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r +; +; LATENCY-LABEL: 'extract_float' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %r +; +; CODESIZE-LABEL: 'extract_float' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %r ; %r = extractvalue {i32, float} %agg, 1 ret float %r } define [42 x i42] @extract_array({i32, [42 x i42]} %agg) { -; ALL-LABEL: 'extract_array' -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret [42 x i42] %r +; THROUGHPUT-LABEL: 'extract_array' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret [42 x i42] %r +; +; LATENCY-LABEL: 'extract_array' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret [42 x i42] %r +; +; CODESIZE-LABEL: 'extract_array' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret [42 x i42] %r ; %r = extractvalue {i32, [42 x i42]} %agg, 1 ret [42 x i42] %r } define <42 x i42> @extract_vector({i32, <42 x i42>} %agg) { -; ALL-LABEL: 'extract_vector' -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <42 x i42> %r +; THROUGHPUT-LABEL: 'extract_vector' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <42 x i42> %r +; +; LATENCY-LABEL: 'extract_vector' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <42 x i42> %r +; +; CODESIZE-LABEL: 'extract_vector' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <42 x i42> %r ; %r = extractvalue {i32, <42 x i42>} %agg, 1 ret <42 x i42> %r @@ -69,9 +125,17 @@ %T1 = type { i32, float, <4 x i1> } define %T1 @extract_struct({i32, %T1} %agg) { -; ALL-LABEL: 'extract_struct' -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 -; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %T1 %r +; THROUGHPUT-LABEL: 'extract_struct' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %T1 %r +; +; LATENCY-LABEL: 'extract_struct' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %T1 %r +; +; CODESIZE-LABEL: 'extract_struct' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %T1 %r ; %r = extractvalue {i32, %T1} %agg, 1 ret %T1 %r diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll --- a/llvm/test/Analysis/CostModel/AArch64/cast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cast.ll @@ -267,7 +267,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %r0 = sext i1 undef to i8 %r1 = zext i1 undef to i8 @@ -609,7 +609,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r23, i16* undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r24, i32* undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r25, i64* undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r12 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r12 ; %r0 = sext i8 %a to i16 %r1 = sext i8 %a to i32 @@ -683,7 +683,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %a = bitcast i32 undef to i32 %b = bitcast float undef to float @@ -731,7 +731,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %loadi8 = load i8, i8* undef %loadi16 = load i16, i16* undef @@ -786,7 +786,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r4, i16* undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r5 = trunc i16 undef to i8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %r5, i8* undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %r0 = trunc i64 undef to i8 store i8 %r0, i8* undef diff --git a/llvm/test/Analysis/CostModel/AArch64/cmp.ll b/llvm/test/Analysis/CostModel/AArch64/cmp.ll --- a/llvm/test/Analysis/CostModel/AArch64/cmp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cmp.ll @@ -17,7 +17,7 @@ ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-SIZE-LABEL: 'cmps' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0 = icmp slt i8 undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/select.ll b/llvm/test/Analysis/CostModel/AArch64/select.ll --- a/llvm/test/Analysis/CostModel/AArch64/select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/select.ll @@ -18,7 +18,7 @@ ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'select' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v7 = shufflevector <2 x i8> undef, <2 x i8>undef, <2 x i32> zeroinitializer %v8 = shufflevector <4 x i8> undef, <4 x i8>undef, <4 x i32> zeroinitializer diff --git a/llvm/test/Analysis/CostModel/AArch64/store.ll b/llvm/test/Analysis/CostModel/AArch64/store.ll --- a/llvm/test/Analysis/CostModel/AArch64/store.ll +++ b/llvm/test/Analysis/CostModel/AArch64/store.ll @@ -27,7 +27,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %1 = load <2 x i8>, <2 x i8>* undef, align 2 ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %2 = load <4 x i8>, <4 x i8>* undef, align 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'getMemoryOpCost' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4 @@ -69,7 +69,7 @@ ; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4 ; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %1 = load <2 x i8>, <2 x i8>* undef, align 2 ; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %2 = load <4 x i8>, <4 x i8>* undef, align 4 -; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; store <4 x i64> undef, <4 x i64> * undef store <8 x i32> undef, <8 x i32> * undef diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll @@ -7,12 +7,12 @@ ; This test checks that we correctly compute the scalarized operands for a ; user-specified vectorization factor when interleaving is disabled. We use the -; "optsize" attribute to disable all interleaving calculations. A cost of 5 +; "optsize" attribute to disable all interleaving calculations. A cost of 4 ; for %tmp4 indicates that we would scalarize it's operand (%tmp3), giving ; %tmp4 a lower scalarization overhead. ; ; COST-LABEL: predicated_udiv_scalarized_operand -; COST: LV: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i64 %tmp2, %tmp3 +; COST: LV: Found an estimated cost of 4 for VF 2 For instruction: %tmp4 = udiv i64 %tmp2, %tmp3 ; ; CHECK-LABEL: @predicated_udiv_scalarized_operand( ; CHECK: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -8,7 +8,7 @@ ; Check scalar cost for extractvalue. The constant and loop invariant operands are free, ; leaving cost 3 for scalarizing the result + 2 for executing the op with VF 2. -; CM: LV: Scalar loop costs: 9. +; CM: LV: Scalar loop costs: 7. ; CM: LV: Found an estimated cost of 5 for VF 2 For instruction: %a = extractvalue { i64, i64 } %sv, 0 ; CM-NEXT: LV: Found an estimated cost of 5 for VF 2 For instruction: %b = extractvalue { i64, i64 } %sv, 1 @@ -57,7 +57,7 @@ ; Similar to the test case above, but checks getVectorCallCost as well. declare float @pow(float, float) readnone nounwind -; CM: LV: Scalar loop costs: 18. +; CM: LV: Scalar loop costs: 16. ; CM: LV: Found an estimated cost of 5 for VF 2 For instruction: %a = extractvalue { float, float } %sv, 0 ; CM-NEXT: LV: Found an estimated cost of 5 for VF 2 For instruction: %b = extractvalue { float, float } %sv, 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll @@ -19,7 +19,7 @@ ; (udiv(2) + extractelement(6) + insertelement(3)) / 2 = 5 ; ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3 -; CHECK: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3 +; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3 ; define i32 @predicated_udiv(i32* %a, i32* %b, i1 %c, i64 %n) { entry: @@ -101,7 +101,7 @@ ; CHECK: Scalarizing: %tmp3 = add nsw i32 %tmp2, %x ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3 ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp3 = add nsw i32 %tmp2, %x -; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3 +; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3 ; define i32 @predicated_udiv_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) { entry: @@ -198,8 +198,8 @@ ; CHECK: Scalarizing: %tmp5 = sub i32 %tmp4, %x ; CHECK: Scalarizing and predicating: store i32 %tmp5, i32* %tmp0, align 4 ; CHECK: Found an estimated cost of 1 for VF 2 For instruction: %tmp2 = add i32 %tmp1, %x -; CHECK: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = sdiv i32 %tmp1, %tmp2 -; CHECK: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = udiv i32 %tmp3, %tmp2 +; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp3 = sdiv i32 %tmp1, %tmp2 +; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp3, %tmp2 ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp5 = sub i32 %tmp4, %x ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp5, i32* %tmp0, align 4 ;