diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2694,6 +2694,11 @@ static const TypeConversionCostTblEntry VectorSelectTbl[] = { + { ISD::SELECT, MVT::v2i1, MVT::v2f32, 2 }, + { ISD::SELECT, MVT::v2i1, MVT::v2f64, 2 }, + { ISD::SELECT, MVT::v4i1, MVT::v4f32, 2 }, + { ISD::SELECT, MVT::v4i1, MVT::v4f16, 2 }, + { ISD::SELECT, MVT::v8i1, MVT::v8f16, 2 }, { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 }, { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 }, { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 }, diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll --- a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll @@ -163,9 +163,9 @@ define void @reduce_fmin16() { ; CHECK-NOF16-LABEL: 'reduce_fmin16' ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef) @@ -197,9 +197,9 @@ define void @reduce_fmax16() { ; CHECK-NOF16-LABEL: 'reduce_fmax16' ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef) diff --git a/llvm/test/Analysis/CostModel/AArch64/select.ll b/llvm/test/Analysis/CostModel/AArch64/select.ll --- a/llvm/test/Analysis/CostModel/AArch64/select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/select.ll @@ -18,6 +18,11 @@ ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'select' @@ -33,6 +38,11 @@ ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v1 = select i1 undef, i8 undef, i8 undef @@ -53,5 +63,11 @@ %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef + ; simd vector float + %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef + %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef + %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef + %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef + %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef ret void } diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -158,7 +158,7 @@ define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c) { ; COST-LABEL: v4f16_select_ogt ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp ogt <4 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ogt <4 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; @@ -176,7 +176,7 @@ define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-LABEL: v8f16_select_ogt ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp ogt <8 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ogt <8 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; @@ -242,7 +242,7 @@ define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c) { ; COST-LABEL: v4f16_select_oge ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp oge <4 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp oge <4 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; @@ -260,7 +260,7 @@ define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-LABEL: v8f16_select_oge ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp oge <8 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp oge <8 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; @@ -326,7 +326,7 @@ define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c) { ; COST-LABEL: v4f16_select_olt ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp olt <4 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp olt <4 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; @@ -344,7 +344,7 @@ define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-LABEL: v8f16_select_olt ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp olt <8 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp olt <8 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; @@ -410,7 +410,7 @@ define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c) { ; COST-LABEL: v4f16_select_ole ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp ole <4 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ole <4 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; @@ -428,7 +428,7 @@ define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-LABEL: v8f16_select_ole ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp ole <8 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ole <8 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; @@ -494,7 +494,7 @@ define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c) { ; COST-LABEL: v4f16_select_oeq ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp oeq <4 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp oeq <4 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; @@ -512,7 +512,7 @@ define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-LABEL: v8f16_select_oeq ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp oeq <8 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp oeq <8 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; @@ -578,9 +578,9 @@ define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c) { ; COST-LABEL: v4f16_select_one ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp one <4 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <4 x half> %a, %b -; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c +; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; ; CODE-LABEL: v4f16_select_one ; CODE: bb.0 @@ -598,9 +598,9 @@ define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-LABEL: v8f16_select_one ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp one <8 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <8 x half> %a, %b -; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c +; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; ; CODE-LABEL: v8f16_select_one ; CODE: bb.0 @@ -618,7 +618,7 @@ define <2 x float> @v2f32_select_one(<2 x float> %a, <2 x float> %b, <2 x float> %c) { ; COST-LABEL: v2f32_select_one ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <2 x float> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c ; CODE-LABEL: v2f32_select_one ; CODE: bb.0 @@ -636,7 +636,7 @@ define <4 x float> @v4f32_select_one(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; COST-LABEL: v4f32_select_one ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <4 x float> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c ; CODE-LABEL: v4f32_select_one ; CODE: bb.0 @@ -654,7 +654,7 @@ define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; COST-LABEL: v2f64_select_one ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <2 x double> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c ; ; CODE-LABEL: v2f64_select_one ; CODE: bb.0 @@ -672,7 +672,7 @@ define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c) { ; COST-LABEL: v4f16_select_une ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cmp.1 = fcmp une <4 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp une <4 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c ; @@ -690,7 +690,7 @@ define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-LABEL: v8f16_select_une ; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %cmp.1 = fcmp une <8 x half> %a, %b -; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c +; COST-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp une <8 x half> %a, %b ; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; @@ -756,7 +756,7 @@ define <2 x float> @v2f32_select_ord(<2 x float> %a, <2 x float> %b, <2 x float> %c) { ; COST-LABEL: v2f32_select_ord ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ord <2 x float> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c ; ; CODE-LABEL: v2f32_select_ord ; CODE: bb.0 @@ -774,7 +774,7 @@ define <4 x float> @v4f32_select_ord(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; COST-LABEL: v4f32_select_ord ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ord <4 x float> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c ; CODE-LABEL: v4f32_select_ord ; CODE: bb.0 @@ -792,7 +792,7 @@ define <2 x double> @v2f64_select_ord(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; COST-LABEL: v2f64_select_ord ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ord <2 x double> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c ; ; CODE-LABEL: v2f64_select_ord ; CODE: bb.0 diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll @@ -133,37 +133,50 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) { ; CHECK-LABEL: @loop2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 40000 -; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 40000 -; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 40000 -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[UGLYGEP2]], [[B]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[UGLYGEP]], [[C]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 40000 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 40000 +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 40000 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[SCEVGEP2]], [[B]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[SCEVGEP]], [[C]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[BOUND04:%.*]] = icmp ugt ptr [[UGLYGEP3]], [[B]] -; CHECK-NEXT: [[BOUND15:%.*]] = icmp ugt ptr [[UGLYGEP]], [[A]] +; CHECK-NEXT: [[BOUND04:%.*]] = icmp ugt ptr [[SCEVGEP3]], [[B]] +; CHECK-NEXT: [[BOUND15:%.*]] = icmp ugt ptr [[SCEVGEP]], [[A]] ; CHECK-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[LOOP_BODY:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT10]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope !4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !alias.scope !7 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[WIDE_LOAD7]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !9, !noalias !11 -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x float> , <4 x float> [[WIDE_LOAD8]] -; CHECK-NEXT: [[PREDPHI:%.*]] = fadd <4 x float> [[TMP3]], [[TMP5]] -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP4]], align 4, !alias.scope !9, !noalias !11 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 -; CHECK-NEXT: br i1 [[TMP6]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope !4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !7 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4 +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope !7 +; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[WIDE_LOAD8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD9]], [[BROADCAST_SPLAT11]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 4 +; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11 +; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP2]], <4 x float> , <4 x float> [[WIDE_LOAD12]] +; CHECK-NEXT: [[PREDPHI:%.*]] = fadd <4 x float> [[TMP6]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP3]], <4 x float> , <4 x float> [[WIDE_LOAD13]] +; CHECK-NEXT: [[PREDPHI14:%.*]] = fadd <4 x float> [[TMP7]], [[TMP11]] +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11 +; CHECK-NEXT: store <4 x float> [[PREDPHI14]], ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 +; CHECK-NEXT: br i1 [[TMP12]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: loop.body: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[C_GEP:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV1]]