Index: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h @@ -1444,7 +1444,8 @@ 0, Ty); ArithCost += (NumReduxLevels - LongVectorCount) * ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); - return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); + return ShuffleCost + ArithCost + + ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); } /// Try to calculate op costs for min/max reduction operations. @@ -1502,8 +1503,8 @@ // Need 3 extractelement instructions for scalarization + an additional // scalar select instruction. return ShuffleCost + MinMaxCost + - 3 * getScalarizationOverhead(Ty, /*Insert=*/false, - /*Extract=*/true) + + 3 * ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, + 0) + ConcreteTTI->getCmpSelInstrCost(Instruction::Select, ScalarTy, ScalarCondTy, nullptr); } Index: llvm/trunk/test/Analysis/CostModel/AArch64/vector-reduce.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/AArch64/vector-reduce.ll +++ llvm/trunk/test/Analysis/CostModel/AArch64/vector-reduce.ll @@ -47,7 +47,7 @@ } ; COST-LABEL: umin.i8.v8i8 -; COST: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 217 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %v) ; CODE-LABEL: umin.i8.v8i8 ; CODE: uminv b0, v0.8b define i8 @umin.i8.v8i8(<8 x i8> %v) { @@ -56,7 +56,7 @@ } ; COST-LABEL: umin.i8.v16i8 -; COST: Found an estimated cost of 744 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 609 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %v) ; CODE-LABEL: umin.i8.v16i8 ; CODE: uminv b0, v0.16b define i8 @umin.i8.v16i8(<16 x i8> %v) { @@ -65,7 +65,7 @@ } ; COST-LABEL: umin.i16.v4i16 -; COST: Found an estimated cost of 92 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 65 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %v) ; CODE-LABEL: umin.i16.v4i16 ; CODE: uminv h0, v0.4h define i16 @umin.i16.v4i16(<4 x i16> %v) { @@ -74,7 +74,7 @@ } ; COST-LABEL: umin.i16.v8i16 -; COST: Found an estimated cost of 280 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 217 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %v) ; CODE-LABEL: umin.i16.v8i16 ; CODE: uminv h0, v0.8h define i16 @umin.i16.v8i16(<8 x i16> %v) { @@ -83,7 +83,7 @@ } ; COST-LABEL: umin.i32.v4i32 -; COST: Found an estimated cost of 62 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 35 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %v) ; CODE-LABEL: umin.i32.v4i32 ; CODE: uminv s0, v0.4s define i32 @umin.i32.v4i32(<4 x i32> %v) { @@ -92,7 +92,7 @@ } ; COST-LABEL: umax.i8.v8i8 -; COST: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 217 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %v) ; CODE-LABEL: umax.i8.v8i8 ; CODE: umaxv b0, v0.8b define i8 @umax.i8.v8i8(<8 x i8> %v) { @@ -101,7 +101,7 @@ } ; COST-LABEL: umax.i8.v16i8 -; COST: Found an estimated cost of 744 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 609 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %v) ; CODE-LABEL: umax.i8.v16i8 ; CODE: umaxv b0, v0.16b define i8 @umax.i8.v16i8(<16 x i8> %v) { @@ -110,7 +110,7 @@ } ; COST-LABEL: umax.i16.v4i16 -; COST: Found an estimated cost of 92 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 65 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %v) ; CODE-LABEL: umax.i16.v4i16 ; CODE: umaxv h0, v0.4h define i16 @umax.i16.v4i16(<4 x i16> %v) { @@ -119,7 +119,7 @@ } ; COST-LABEL: umax.i16.v8i16 -; COST: Found an estimated cost of 280 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 217 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %v) ; CODE-LABEL: umax.i16.v8i16 ; CODE: umaxv h0, v0.8h define i16 @umax.i16.v8i16(<8 x i16> %v) { @@ -128,7 +128,7 @@ } ; COST-LABEL: umax.i32.v4i32 -; COST: Found an estimated cost of 62 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 35 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %v) ; CODE-LABEL: umax.i32.v4i32 ; CODE: umaxv s0, v0.4s define i32 @umax.i32.v4i32(<4 x i32> %v) { @@ -137,7 +137,7 @@ } ; COST-LABEL: smin.i8.v8i8 -; COST: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 217 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %v) ; CODE-LABEL: smin.i8.v8i8 ; CODE: sminv b0, v0.8b define i8 @smin.i8.v8i8(<8 x i8> %v) { @@ -146,7 +146,7 @@ } ; COST-LABEL: smin.i8.v16i8 -; COST: Found an estimated cost of 744 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 609 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %v) ; CODE-LABEL: smin.i8.v16i8 ; CODE: sminv b0, v0.16b define i8 @smin.i8.v16i8(<16 x i8> %v) { @@ -155,7 +155,7 @@ } ; COST-LABEL: smin.i16.v4i16 -; COST: Found an estimated cost of 92 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 65 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %v) ; CODE-LABEL: smin.i16.v4i16 ; CODE: sminv h0, v0.4h define i16 @smin.i16.v4i16(<4 x i16> %v) { @@ -164,7 +164,7 @@ } ; COST-LABEL: smin.i16.v8i16 -; COST: Found an estimated cost of 280 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 217 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %v) ; CODE-LABEL: smin.i16.v8i16 ; CODE: sminv h0, v0.8h define i16 @smin.i16.v8i16(<8 x i16> %v) { @@ -173,7 +173,7 @@ } ; COST-LABEL: smin.i32.v4i32 -; COST: Found an estimated cost of 62 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 35 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %v) ; CODE-LABEL: smin.i32.v4i32 ; CODE: sminv s0, v0.4s define i32 @smin.i32.v4i32(<4 x i32> %v) { @@ -182,7 +182,7 @@ } ; COST-LABEL: smax.i8.v8i8 -; COST: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %v) +; COST: Found an estimated cost of 217 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %v) ; CODE-LABEL: smax.i8.v8i8 ; CODE: smaxv b0, v0.8b define i8 @smax.i8.v8i8(<8 x i8> %v) { @@ -191,7 +191,7 @@ } ; COST-LABEL: smax.i8.v16i8 -; COST: Found an estimated cost of 744 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %v) +; COST: Found an estimated cost of 609 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %v) ; CODE-LABEL: smax.i8.v16i8 ; CODE: smaxv b0, v0.16b define i8 @smax.i8.v16i8(<16 x i8> %v) { @@ -200,7 +200,7 @@ } ; COST-LABEL: smax.i16.v4i16 -; COST: Found an estimated cost of 92 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %v) +; COST: Found an estimated cost of 65 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %v) ; CODE-LABEL: smax.i16.v4i16 ; CODE: smaxv h0, v0.4h define i16 @smax.i16.v4i16(<4 x i16> %v) { @@ -209,7 +209,7 @@ } ; COST-LABEL: smax.i16.v8i16 -; COST: Found an estimated cost of 280 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %v) +; COST: Found an estimated cost of 217 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %v) ; CODE-LABEL: smax.i16.v8i16 ; CODE: smaxv h0, v0.8h define i16 @smax.i16.v8i16(<8 x i16> %v) { @@ -218,7 +218,7 @@ } ; COST-LABEL: smax.i32.v4i32 -; COST: Found an estimated cost of 62 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %v) +; COST: Found an estimated cost of 35 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %v) ; CODE-LABEL: smax.i32.v4i32 ; CODE: smaxv s0, v0.4s define i32 @smax.i32.v4i32(<4 x i32> %v) { @@ -227,7 +227,7 @@ } ; COST-LABEL: fmin.f32.v4f32 -; COST: Found an estimated cost of 62 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %v) +; COST: Found an estimated cost of 35 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %v) ; CODE-LABEL: fmin.f32.v4f32 ; CODE: fminnmv s0, v0.4s define float @fmin.f32.v4f32(<4 x float> %v) { @@ -236,7 +236,7 @@ } ; COST-LABEL: fmax.f32.v4f32 -; COST: Found an estimated cost of 62 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %v) +; COST: Found an estimated cost of 35 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %v) ; CODE-LABEL: fmax.f32.v4f32 ; CODE: fmaxnmv s0, v0.4s define float @fmax.f32.v4f32(<4 x float> %v) { Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-add-widen.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-add-widen.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-add-widen.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -45,8 +45,8 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) @@ -59,19 +59,19 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -94,8 +94,8 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) @@ -108,19 +108,19 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -134,41 +134,41 @@ ; AVX1-LABEL: 'reduce_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) @@ -181,67 +181,67 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-add.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -45,8 +45,8 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef) @@ -59,19 +59,19 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -94,8 +94,8 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef) @@ -108,19 +108,19 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -134,41 +134,41 @@ ; AVX1-LABEL: 'reduce_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef) @@ -181,67 +181,67 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-and-widen.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-and-widen.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-and-widen.ll @@ -11,34 +11,34 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) @@ -51,35 +51,35 @@ define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) @@ -92,67 +92,67 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) @@ -165,67 +165,67 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) @@ -239,90 +239,90 @@ define i32 @reduce_i1(i32 %arg) { ; SSE2-LABEL: 'reduce_i1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i1' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i1' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 357 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 838 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 777 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-and.ll @@ -11,34 +11,34 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef) @@ -51,35 +51,35 @@ define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef) @@ -92,67 +92,67 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef) @@ -165,67 +165,67 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef) @@ -239,90 +239,90 @@ define i32 @reduce_i1(i32 %arg) { ; SSE2-LABEL: 'reduce_i1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i1' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i1' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 357 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 838 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 777 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-mul-widen.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-mul-widen.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-mul-widen.ll @@ -11,50 +11,50 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 149 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) @@ -67,51 +67,51 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) @@ -124,67 +124,67 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) @@ -197,67 +197,67 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 161 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 254 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 358 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 223 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 327 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 239 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-mul.ll @@ -11,50 +11,50 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 149 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef) @@ -67,67 +67,67 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i32' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i32' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i32' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef) @@ -140,67 +140,67 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef) @@ -213,67 +213,67 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 161 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 254 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 358 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 223 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 327 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 239 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-or-widen.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-or-widen.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-or-widen.ll @@ -11,34 +11,34 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) @@ -51,35 +51,35 @@ define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) @@ -92,67 +92,67 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) @@ -165,67 +165,67 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) @@ -239,90 +239,90 @@ define i32 @reduce_i1(i32 %arg) { ; SSE2-LABEL: 'reduce_i1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i1' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i1' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 357 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 838 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 777 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-or.ll @@ -11,34 +11,34 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef) @@ -51,35 +51,35 @@ define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef) @@ -92,67 +92,67 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef) @@ -165,67 +165,67 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef) @@ -239,90 +239,90 @@ define i32 @reduce_i1(i32 %arg) { ; SSE2-LABEL: 'reduce_i1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i1' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i1' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 357 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 838 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 777 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-smax-widen.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-smax-widen.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-smax-widen.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -36,9 +36,9 @@ ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,19 +124,19 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -150,9 +150,9 @@ ; AVX1-LABEL: 'reduce_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -175,8 +175,8 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' @@ -197,64 +197,64 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-smax.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -36,9 +36,9 @@ ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,19 +124,19 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -150,9 +150,9 @@ ; AVX1-LABEL: 'reduce_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -175,8 +175,8 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' @@ -197,40 +197,40 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) @@ -238,7 +238,7 @@ ; ; AVX512F-LABEL: 'reduce_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) @@ -246,15 +246,15 @@ ; ; AVX512BW-LABEL: 'reduce_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-smin-widen.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-smin-widen.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-smin-widen.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -36,9 +36,9 @@ ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,19 +124,19 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -150,9 +150,9 @@ ; AVX1-LABEL: 'reduce_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -175,8 +175,8 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' @@ -197,64 +197,64 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-smin.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -36,9 +36,9 @@ ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,19 +124,19 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -150,9 +150,9 @@ ; AVX1-LABEL: 'reduce_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -175,8 +175,8 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' @@ -197,40 +197,40 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) @@ -238,7 +238,7 @@ ; ; AVX512F-LABEL: 'reduce_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) @@ -246,15 +246,15 @@ ; ; AVX512BW-LABEL: 'reduce_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-umax-widen.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-umax-widen.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-umax-widen.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -36,9 +36,9 @@ ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,19 +124,19 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -150,9 +150,9 @@ ; AVX1-LABEL: 'reduce_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -175,8 +175,8 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' @@ -197,64 +197,64 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-umax.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -36,9 +36,9 @@ ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,19 +124,19 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -150,9 +150,9 @@ ; AVX1-LABEL: 'reduce_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -175,8 +175,8 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' @@ -197,40 +197,40 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) @@ -238,7 +238,7 @@ ; ; AVX512F-LABEL: 'reduce_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) @@ -246,15 +246,15 @@ ; ; AVX512BW-LABEL: 'reduce_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-umin-widen.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-umin-widen.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-umin-widen.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -36,9 +36,9 @@ ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,19 +124,19 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -150,9 +150,9 @@ ; AVX1-LABEL: 'reduce_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -175,8 +175,8 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' @@ -197,64 +197,64 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-umin.ll @@ -11,18 +11,18 @@ define i32 @reduce_i64(i32 %arg) { ; SSE2-LABEL: 'reduce_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i64' @@ -36,9 +36,9 @@ ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' @@ -67,19 +67,19 @@ define i32 @reduce_i32(i32 %arg) { ; SSE2-LABEL: 'reduce_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i32' @@ -124,19 +124,19 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' @@ -150,9 +150,9 @@ ; AVX1-LABEL: 'reduce_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' @@ -175,8 +175,8 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' @@ -197,40 +197,40 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) @@ -238,7 +238,7 @@ ; ; AVX512F-LABEL: 'reduce_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) @@ -246,15 +246,15 @@ ; ; AVX512BW-LABEL: 'reduce_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 253 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-xor-widen.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-xor-widen.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-xor-widen.ll @@ -11,34 +11,34 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) @@ -51,35 +51,35 @@ define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) @@ -92,67 +92,67 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) @@ -165,67 +165,67 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) @@ -239,90 +239,90 @@ define i32 @reduce_i1(i32 %arg) { ; SSE2-LABEL: 'reduce_i1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i1' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i1' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 357 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 838 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 777 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduce-xor.ll @@ -11,34 +11,34 @@ define i32 @reduce_i64(i32 %arg) { ; SSE-LABEL: 'reduce_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef) @@ -51,35 +51,35 @@ define i32 @reduce_i32(i32 %arg) { ; SSE-LABEL: 'reduce_i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'reduce_i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef) @@ -92,67 +92,67 @@ define i32 @reduce_i16(i32 %arg) { ; SSE2-LABEL: 'reduce_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef) @@ -165,67 +165,67 @@ define i32 @reduce_i8(i32 %arg) { ; SSE2-LABEL: 'reduce_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef) @@ -239,90 +239,90 @@ define i32 @reduce_i1(i32 %arg) { ; SSE2-LABEL: 'reduce_i1' ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'reduce_i1' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'reduce_i1' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'reduce_i1' ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'reduce_i1' ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'reduce_i1' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'reduce_i1' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 357 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 838 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 777 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'reduce_i1' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef) Index: llvm/trunk/test/Analysis/CostModel/X86/reduction.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/reduction.ll +++ llvm/trunk/test/Analysis/CostModel/X86/reduction.ll @@ -15,7 +15,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'reduction_cost_float' @@ -23,7 +23,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'reduction_cost_float' @@ -59,7 +59,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3 -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SSSE3-LABEL: 'reduction_cost_int' @@ -69,7 +69,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SSE42-LABEL: 'reduction_cost_int' @@ -127,7 +127,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -138,7 +138,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -188,7 +188,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -199,7 +199,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -248,7 +248,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -258,7 +258,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; @@ -300,13 +300,13 @@ ; SSE2-LABEL: 'no_pairwise_reduction2double' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'no_pairwise_reduction2double' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'no_pairwise_reduction2double' @@ -334,7 +334,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'no_pairwise_reduction4float' @@ -342,7 +342,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'no_pairwise_reduction4float' @@ -376,7 +376,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'no_pairwise_reduction4double' @@ -384,7 +384,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'no_pairwise_reduction4double' @@ -428,7 +428,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'no_pairwise_reduction8float' @@ -438,7 +438,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'no_pairwise_reduction8float' @@ -486,13 +486,13 @@ ; SSE2-LABEL: 'no_pairwise_reduction2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SSSE3-LABEL: 'no_pairwise_reduction2i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SSE42-LABEL: 'no_pairwise_reduction2i64' @@ -520,7 +520,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SSSE3-LABEL: 'no_pairwise_reduction4i32' @@ -528,7 +528,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SSE42-LABEL: 'no_pairwise_reduction4i32' @@ -562,7 +562,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SSSE3-LABEL: 'no_pairwise_reduction4i64' @@ -570,7 +570,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SSE42-LABEL: 'no_pairwise_reduction4i64' @@ -614,7 +614,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSSE3-LABEL: 'no_pairwise_reduction8i16' @@ -624,7 +624,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSE42-LABEL: 'no_pairwise_reduction8i16' @@ -666,7 +666,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7 -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SSSE3-LABEL: 'no_pairwise_reduction8i32' @@ -676,7 +676,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SSE42-LABEL: 'no_pairwise_reduction8i32' @@ -725,14 +725,14 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'pairwise_reduction2double' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'pairwise_reduction2double' @@ -765,7 +765,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'pairwise_reduction4float' @@ -775,7 +775,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'pairwise_reduction4float' @@ -817,7 +817,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'pairwise_reduction4double' @@ -827,7 +827,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'pairwise_reduction4double' @@ -882,7 +882,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'pairwise_reduction8float' @@ -895,7 +895,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'pairwise_reduction8float' @@ -956,14 +956,14 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SSSE3-LABEL: 'pairwise_reduction2i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SSE42-LABEL: 'pairwise_reduction2i64' @@ -996,7 +996,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SSSE3-LABEL: 'pairwise_reduction4i32' @@ -1006,7 +1006,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SSE42-LABEL: 'pairwise_reduction4i32' @@ -1048,7 +1048,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SSSE3-LABEL: 'pairwise_reduction4i64' @@ -1058,7 +1058,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SSE42-LABEL: 'pairwise_reduction4i64' @@ -1113,7 +1113,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSSE3-LABEL: 'pairwise_reduction8i16' @@ -1126,7 +1126,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSE42-LABEL: 'pairwise_reduction8i16' @@ -1180,7 +1180,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SSSE3-LABEL: 'pairwise_reduction8i32' @@ -1193,7 +1193,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SSE42-LABEL: 'pairwise_reduction8i32' Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -7,30 +7,27 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LOCAL_4_39_US:%.*]] = phi i32 [ [[VAL_42:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[LOCAL_8_43_US:%.*]] = phi i32 [ [[VAL_43:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[VAL_0:%.*]] = add i32 [[LOCAL_4_39_US]], 0 -; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[LOCAL_8_43_US]], [[VAL_0]] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef ; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] ; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] ; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]] ; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]] -; CHECK-NEXT: [[VAL_6:%.*]] = add i32 [[LOCAL_8_43_US]], 55 -; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], [[VAL_6]] +; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef ; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]] ; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]] ; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]] -; CHECK-NEXT: [[VAL_11:%.*]] = add i32 [[LOCAL_8_43_US]], 285 -; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], [[VAL_11]] +; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef ; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]] ; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]] ; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]] ; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]] ; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]] -; CHECK-NEXT: [[VAL_18:%.*]] = add i32 [[LOCAL_8_43_US]], 1240 -; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], [[VAL_18]] -; CHECK-NEXT: [[VAL_20:%.*]] = add i32 [[LOCAL_8_43_US]], 1496 -; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]] +; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef +; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef ; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]] ; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]] ; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]] @@ -43,16 +40,55 @@ ; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]] ; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]] ; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]] -; CHECK-NEXT: [[VAL_34:%.*]] = add i32 [[LOCAL_8_43_US]], 8555 -; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]] +; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef ; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]] ; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]] ; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]] -; CHECK-NEXT: [[VAL_39:%.*]] = add i32 [[LOCAL_8_43_US]], 12529 -; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], [[VAL_39]] -; CHECK-NEXT: [[VAL_41:%.*]] = add i32 [[LOCAL_8_43_US]], 13685 -; CHECK-NEXT: [[VAL_42]] = and i32 [[VAL_40]], [[VAL_41]] -; CHECK-NEXT: [[VAL_43]] = add i32 [[LOCAL_8_43_US]], 14910 +; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]] +; CHECK-NEXT: [[OP_EXTRA31:%.*]] = and i32 [[OP_EXTRA30]], [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0 ; CHECK-NEXT: br label [[LOOP]] ; ; FORCE_REDUCTION-LABEL: @Test( Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -10,114 +10,33 @@ @var = global i32 zeroinitializer, align 8 define i32 @maxi8(i32) { -; SSE-LABEL: @maxi8( -; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 -; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 -; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] -; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] -; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 -; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] -; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] -; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 -; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] -; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] -; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] -; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] -; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] -; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] -; SSE-NEXT: ret i32 [[TMP23]] -; -; AVX-LABEL: @maxi8( -; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] -; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] -; AVX-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; AVX-NEXT: ret i32 [[TMP16]] -; -; AVX2-LABEL: @maxi8( -; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX2-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; AVX2-NEXT: ret i32 [[TMP16]] -; -; SKX-LABEL: @maxi8( -; SKX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; SKX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; SKX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; SKX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; SKX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] -; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] -; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] -; SKX-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; SKX-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; SKX-NEXT: ret i32 [[TMP16]] +; CHECK-LABEL: @maxi8( +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; CHECK-NEXT: ret i32 [[TMP16]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll @@ -12,8 +12,8 @@ ; Vector cost is 5, Scalar cost is 7 ; AVX: Adding cost -2 for reduction that starts with %7 = load i32, i32* %arrayidx.7, align 4 (It is a splitting reduction) -; Vector cost is 11, Scalar cost is 7 -; SSE: Adding cost 3 for reduction that starts with %7 = load i32, i32* %arrayidx.7, align 4 (It is a splitting reduction) +; Vector cost is 7, Scalar cost is 7 +; SSE: Adding cost 0 for reduction that starts with %7 = load i32, i32* %arrayidx.7, align 4 (It is a splitting reduction) define i32 @test_add(i32* nocapture readonly %p) { ; CHECK-LABEL: @test_add( ; CHECK-NEXT: entry: @@ -136,58 +136,32 @@ ; } define i32 @test_and(i32* nocapture readonly %p) { -; AVX-LABEL: @test_and( -; AVX-NEXT: entry: -; AVX-NEXT: [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4 -; AVX-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 -; AVX-NEXT: [[MUL_18:%.*]] = and i32 [[TMP1]], [[TMP0]] -; AVX-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 -; AVX-NEXT: [[MUL_29:%.*]] = and i32 [[TMP2]], [[MUL_18]] -; AVX-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; AVX-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 -; AVX-NEXT: [[MUL_310:%.*]] = and i32 [[TMP3]], [[MUL_29]] -; AVX-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; AVX-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 -; AVX-NEXT: [[MUL_411:%.*]] = and i32 [[TMP4]], [[MUL_310]] -; AVX-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; AVX-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 -; AVX-NEXT: [[MUL_512:%.*]] = and i32 [[TMP5]], [[MUL_411]] -; AVX-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; AVX-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 -; AVX-NEXT: [[MUL_613:%.*]] = and i32 [[TMP6]], [[MUL_512]] -; AVX-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 -; AVX-NEXT: [[MUL_714:%.*]] = and i32 [[TMP7]], [[MUL_613]] -; AVX-NEXT: ret i32 [[MUL_714]] -; -; SSE-LABEL: @test_and( -; SSE-NEXT: entry: -; SSE-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; SSE-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; SSE-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; SSE-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; SSE-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; SSE-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; SSE-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; SSE-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* -; SSE-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; SSE-NEXT: [[MUL_18:%.*]] = and i32 undef, undef -; SSE-NEXT: [[MUL_29:%.*]] = and i32 undef, [[MUL_18]] -; SSE-NEXT: [[MUL_310:%.*]] = and i32 undef, [[MUL_29]] -; SSE-NEXT: [[MUL_411:%.*]] = and i32 undef, [[MUL_310]] -; SSE-NEXT: [[MUL_512:%.*]] = and i32 undef, [[MUL_411]] -; SSE-NEXT: [[MUL_613:%.*]] = and i32 undef, [[MUL_512]] -; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> -; SSE-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP1]], [[RDX_SHUF]] -; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> -; SSE-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; SSE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> -; SSE-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; SSE-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; SSE-NEXT: [[MUL_714:%.*]] = and i32 undef, [[MUL_613]] -; SSE-NEXT: ret i32 [[TMP2]] +; CHECK-LABEL: @test_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[MUL_18:%.*]] = and i32 undef, undef +; CHECK-NEXT: [[MUL_29:%.*]] = and i32 undef, [[MUL_18]] +; CHECK-NEXT: [[MUL_310:%.*]] = and i32 undef, [[MUL_29]] +; CHECK-NEXT: [[MUL_411:%.*]] = and i32 undef, [[MUL_310]] +; CHECK-NEXT: [[MUL_512:%.*]] = and i32 undef, [[MUL_411]] +; CHECK-NEXT: [[MUL_613:%.*]] = and i32 undef, [[MUL_512]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP1]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[MUL_714:%.*]] = and i32 undef, [[MUL_613]] +; CHECK-NEXT: ret i32 [[TMP2]] ; entry: %0 = load i32, i32* %p, align 4 @@ -223,58 +197,32 @@ ; } define i32 @test_or(i32* nocapture readonly %p) { -; AVX-LABEL: @test_or( -; AVX-NEXT: entry: -; AVX-NEXT: [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4 -; AVX-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 -; AVX-NEXT: [[MUL_18:%.*]] = or i32 [[TMP1]], [[TMP0]] -; AVX-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 -; AVX-NEXT: [[MUL_29:%.*]] = or i32 [[TMP2]], [[MUL_18]] -; AVX-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; AVX-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 -; AVX-NEXT: [[MUL_310:%.*]] = or i32 [[TMP3]], [[MUL_29]] -; AVX-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; AVX-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 -; AVX-NEXT: [[MUL_411:%.*]] = or i32 [[TMP4]], [[MUL_310]] -; AVX-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; AVX-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 -; AVX-NEXT: [[MUL_512:%.*]] = or i32 [[TMP5]], [[MUL_411]] -; AVX-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; AVX-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 -; AVX-NEXT: [[MUL_613:%.*]] = or i32 [[TMP6]], [[MUL_512]] -; AVX-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 -; AVX-NEXT: [[MUL_714:%.*]] = or i32 [[TMP7]], [[MUL_613]] -; AVX-NEXT: ret i32 [[MUL_714]] -; -; SSE-LABEL: @test_or( -; SSE-NEXT: entry: -; SSE-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; SSE-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; SSE-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; SSE-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; SSE-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; SSE-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; SSE-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; SSE-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* -; SSE-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; SSE-NEXT: [[MUL_18:%.*]] = or i32 undef, undef -; SSE-NEXT: [[MUL_29:%.*]] = or i32 undef, [[MUL_18]] -; SSE-NEXT: [[MUL_310:%.*]] = or i32 undef, [[MUL_29]] -; SSE-NEXT: [[MUL_411:%.*]] = or i32 undef, [[MUL_310]] -; SSE-NEXT: [[MUL_512:%.*]] = or i32 undef, [[MUL_411]] -; SSE-NEXT: [[MUL_613:%.*]] = or i32 undef, [[MUL_512]] -; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> -; SSE-NEXT: [[BIN_RDX:%.*]] = or <8 x i32> [[TMP1]], [[RDX_SHUF]] -; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> -; SSE-NEXT: [[BIN_RDX2:%.*]] = or <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; SSE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> -; SSE-NEXT: [[BIN_RDX4:%.*]] = or <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; SSE-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; SSE-NEXT: [[MUL_714:%.*]] = or i32 undef, [[MUL_613]] -; SSE-NEXT: ret i32 [[TMP2]] +; CHECK-LABEL: @test_or( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[MUL_18:%.*]] = or i32 undef, undef +; CHECK-NEXT: [[MUL_29:%.*]] = or i32 undef, [[MUL_18]] +; CHECK-NEXT: [[MUL_310:%.*]] = or i32 undef, [[MUL_29]] +; CHECK-NEXT: [[MUL_411:%.*]] = or i32 undef, [[MUL_310]] +; CHECK-NEXT: [[MUL_512:%.*]] = or i32 undef, [[MUL_411]] +; CHECK-NEXT: [[MUL_613:%.*]] = or i32 undef, [[MUL_512]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i32> [[TMP1]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[MUL_714:%.*]] = or i32 undef, [[MUL_613]] +; CHECK-NEXT: ret i32 [[TMP2]] ; entry: %0 = load i32, i32* %p, align 4 @@ -310,58 +258,32 @@ ; } define i32 @test_xor(i32* nocapture readonly %p) { -; AVX-LABEL: @test_xor( -; AVX-NEXT: entry: -; AVX-NEXT: [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4 -; AVX-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; AVX-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 -; AVX-NEXT: [[MUL_18:%.*]] = xor i32 [[TMP1]], [[TMP0]] -; AVX-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 -; AVX-NEXT: [[MUL_29:%.*]] = xor i32 [[TMP2]], [[MUL_18]] -; AVX-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; AVX-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 -; AVX-NEXT: [[MUL_310:%.*]] = xor i32 [[TMP3]], [[MUL_29]] -; AVX-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; AVX-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 -; AVX-NEXT: [[MUL_411:%.*]] = xor i32 [[TMP4]], [[MUL_310]] -; AVX-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; AVX-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 -; AVX-NEXT: [[MUL_512:%.*]] = xor i32 [[TMP5]], [[MUL_411]] -; AVX-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; AVX-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 -; AVX-NEXT: [[MUL_613:%.*]] = xor i32 [[TMP6]], [[MUL_512]] -; AVX-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 -; AVX-NEXT: [[MUL_714:%.*]] = xor i32 [[TMP7]], [[MUL_613]] -; AVX-NEXT: ret i32 [[MUL_714]] -; -; SSE-LABEL: @test_xor( -; SSE-NEXT: entry: -; SSE-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 -; SSE-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 -; SSE-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 -; SSE-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 -; SSE-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 -; SSE-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 -; SSE-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 -; SSE-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* -; SSE-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; SSE-NEXT: [[MUL_18:%.*]] = xor i32 undef, undef -; SSE-NEXT: [[MUL_29:%.*]] = xor i32 undef, [[MUL_18]] -; SSE-NEXT: [[MUL_310:%.*]] = xor i32 undef, [[MUL_29]] -; SSE-NEXT: [[MUL_411:%.*]] = xor i32 undef, [[MUL_310]] -; SSE-NEXT: [[MUL_512:%.*]] = xor i32 undef, [[MUL_411]] -; SSE-NEXT: [[MUL_613:%.*]] = xor i32 undef, [[MUL_512]] -; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> -; SSE-NEXT: [[BIN_RDX:%.*]] = xor <8 x i32> [[TMP1]], [[RDX_SHUF]] -; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> -; SSE-NEXT: [[BIN_RDX2:%.*]] = xor <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; SSE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> -; SSE-NEXT: [[BIN_RDX4:%.*]] = xor <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; SSE-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; SSE-NEXT: [[MUL_714:%.*]] = xor i32 undef, [[MUL_613]] -; SSE-NEXT: ret i32 [[TMP2]] +; CHECK-LABEL: @test_xor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[MUL_18:%.*]] = xor i32 undef, undef +; CHECK-NEXT: [[MUL_29:%.*]] = xor i32 undef, [[MUL_18]] +; CHECK-NEXT: [[MUL_310:%.*]] = xor i32 undef, [[MUL_29]] +; CHECK-NEXT: [[MUL_411:%.*]] = xor i32 undef, [[MUL_310]] +; CHECK-NEXT: [[MUL_512:%.*]] = xor i32 undef, [[MUL_411]] +; CHECK-NEXT: [[MUL_613:%.*]] = xor i32 undef, [[MUL_512]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <8 x i32> [[TMP1]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = xor <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = xor <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[MUL_714:%.*]] = xor i32 undef, [[MUL_613]] +; CHECK-NEXT: ret i32 [[TMP2]] ; entry: %0 = load i32, i32* %p, align 4 @@ -400,14 +322,15 @@ ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[TMP5]] ; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[SELF]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = xor i32 [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = xor i32 [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = xor i32 [[TMP11]], [[TMP12]] -; CHECK-NEXT: ret i32 [[TMP13]] +; CHECK-NEXT: [[TMP7:%.*]] = xor i32 undef, undef +; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], undef +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <4 x i32> [[TMP6]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = xor <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], undef +; CHECK-NEXT: ret i32 [[TMP9]] ; entry: %0 = load <4 x i32>, <4 x i32>* %self, align 16 Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/remark_horcost.ll @@ -56,19 +56,28 @@ %add52 = add nsw i32 %add38, %add45 ; CHECK: add nsw <{{[0-9]+}} x i32> - ; CHECK-NOT: add nsw <{{[0-9]+}} x i32> + ; CHECK: add nsw <{{[0-9]+}} x i32> - ; YAML: --- !Missed + ; YAML: --- !Passed ; YAML-NEXT: Pass: slp-vectorizer - ; YAML-NEXT: Name: HorSLPNotBeneficial + ; YAML-NEXT: Name: StoresVectorized ; YAML-NEXT: Function: foo ; YAML-NEXT: Args: - ; YAML-NEXT: - String: Vectorizing horizontal reduction is possible - ; YAML-NEXT: - String: 'but not beneficial with cost ' - ; YAML-NEXT: - Cost: '1' - ; YAML-NEXT: - String: ' and threshold ' - ; YAML-NEXT: - Threshold: '0' + ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' + ; YAML-NEXT: - Cost: '-8' + ; YAML-NEXT: - String: ' and with tree size ' + ; YAML-NEXT: - TreeSize: '4' + ; YAML: --- !Passed + ; YAML-NEXT: Pass: slp-vectorizer + ; YAML-NEXT: Name: VectorizedHorizontalReduction + ; YAML-NEXT: Function: foo + ; YAML-NEXT: Args: + ; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost ' + ; YAML-NEXT: - Cost: '-2' + ; YAML-NEXT: - String: ' and with tree size ' + ; YAML-NEXT: - TreeSize: '1' + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 8 br i1 %exitcond, label %for.end, label %for.body