diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -876,6 +876,7 @@ // Two ops + 1 extract + 1 insert = 4. { ISD::MUL, MVT::v16i16, 4 }, { ISD::MUL, MVT::v8i32, 5 }, // BTVER2 from http://www.agner.org/ + { ISD::MUL, MVT::i64, 1 }, // SNB from http://www.agner.org/ { ISD::MUL, MVT::v4i64, 12 }, { ISD::SUB, MVT::v32i8, 4 }, diff --git a/llvm/test/Analysis/CostModel/X86/arith-fix.ll b/llvm/test/Analysis/CostModel/X86/arith-fix.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fix.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fix.ll @@ -71,11 +71,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'smul' -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -90,11 +90,11 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'smul' -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -109,11 +109,11 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'smul' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -128,11 +128,11 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'smul' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -147,11 +147,11 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'smul' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -204,11 +204,11 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'smul' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -305,11 +305,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'umul' -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -324,11 +324,11 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'umul' -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -343,11 +343,11 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'umul' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -362,11 +362,11 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'umul' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -381,11 +381,11 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'umul' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) @@ -438,11 +438,11 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'umul' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1028,9 +1028,9 @@ ; ; AVX1-LABEL: 'smul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1047,9 +1047,9 @@ ; ; AVX2-LABEL: 'smul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1066,9 +1066,9 @@ ; ; AVX512F-LABEL: 'smul' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1085,9 +1085,9 @@ ; ; AVX512BW-LABEL: 'smul' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1104,9 +1104,9 @@ ; ; AVX512DQ-LABEL: 'smul' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1161,9 +1161,9 @@ ; ; BTVER2-LABEL: 'smul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1266,9 +1266,9 @@ ; ; AVX1-LABEL: 'umul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1285,9 +1285,9 @@ ; ; AVX2-LABEL: 'umul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1304,9 +1304,9 @@ ; ; AVX512F-LABEL: 'umul' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1323,9 +1323,9 @@ ; ; AVX512BW-LABEL: 'umul' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1342,9 +1342,9 @@ ; ; AVX512DQ-LABEL: 'umul' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1399,9 +1399,9 @@ ; ; BTVER2-LABEL: 'umul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith.ll b/llvm/test/Analysis/CostModel/X86/arith.ll --- a/llvm/test/Analysis/CostModel/X86/arith.ll +++ b/llvm/test/Analysis/CostModel/X86/arith.ll @@ -974,7 +974,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = mul <2 x i64> undef, undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = mul <4 x i64> undef, undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = mul <8 x i64> undef, undef @@ -996,7 +996,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = mul <2 x i64> undef, undef ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = mul <4 x i64> undef, undef ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = mul <8 x i64> undef, undef @@ -1018,7 +1018,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = mul <2 x i64> undef, undef ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = mul <4 x i64> undef, undef ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = mul <8 x i64> undef, undef @@ -1040,7 +1040,7 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = mul <2 x i64> undef, undef ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = mul <4 x i64> undef, undef ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = mul <8 x i64> undef, undef @@ -1062,7 +1062,7 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'mul' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = mul <2 x i64> undef, undef ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = mul <4 x i64> undef, undef ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = mul <8 x i64> undef, undef @@ -1128,7 +1128,7 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'mul' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = mul <2 x i64> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = mul <4 x i64> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = mul <8 x i64> undef, undef diff --git a/llvm/test/Analysis/CostModel/X86/mul.ll b/llvm/test/Analysis/CostModel/X86/mul.ll --- a/llvm/test/Analysis/CostModel/X86/mul.ll +++ b/llvm/test/Analysis/CostModel/X86/mul.ll @@ -422,7 +422,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_constnegpow2' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = mul <8 x i64> undef, @@ -441,7 +441,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_constnegpow2' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, @@ -460,7 +460,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_constnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, @@ -479,7 +479,7 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_constnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, @@ -598,7 +598,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstnegpow2' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = mul <8 x i64> undef, @@ -617,7 +617,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstnegpow2' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, @@ -636,7 +636,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, @@ -655,7 +655,7 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, diff --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll --- a/llvm/test/Analysis/CostModel/X86/rem.ll +++ b/llvm/test/Analysis/CostModel/X86/rem.ll @@ -1,35 +1,73 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW -; -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SLM -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,GLM -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,BTVER2 define i32 @srem() { -; CHECK-LABEL: 'srem' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'srem' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'srem' +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'srem' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, undef %V2i64 = srem <2 x i64> undef, undef @@ -55,24 +93,62 @@ } define i32 @urem() { -; CHECK-LABEL: 'urem' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'urem' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'urem' +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'urem' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, undef %V2i64 = urem <2 x i64> undef, undef @@ -156,10 +232,10 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_const' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i32 = srem <8 x i32> undef, @@ -175,10 +251,10 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_const' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, @@ -194,10 +270,10 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_const' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, @@ -213,10 +289,10 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_const' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, @@ -270,10 +346,10 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_const' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i32 = srem <8 x i32> undef, @@ -332,10 +408,10 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'urem_const' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i32 = urem <8 x i32> undef, @@ -351,10 +427,10 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'urem_const' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, @@ -370,10 +446,10 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_const' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, @@ -389,10 +465,10 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_const' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, @@ -408,10 +484,10 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'urem_const' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i32 = urem <8 x i32> undef, @@ -470,10 +546,10 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconst' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, @@ -489,10 +565,10 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconst' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, @@ -508,10 +584,10 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconst' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, @@ -527,10 +603,10 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconst' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, @@ -546,10 +622,10 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_uniformconst' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, @@ -608,10 +684,10 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'urem_uniformconst' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, @@ -627,10 +703,10 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'urem_uniformconst' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, @@ -646,10 +722,10 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_uniformconst' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, @@ -665,10 +741,10 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_uniformconst' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, @@ -684,10 +760,10 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'urem_uniformconst' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, @@ -784,7 +860,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_constpow2' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, @@ -803,7 +879,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_constpow2' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8i64 = srem <8 x i64> undef, @@ -822,7 +898,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_constpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, @@ -841,7 +917,7 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_constpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, @@ -898,7 +974,7 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_constpow2' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2i64 = srem <2 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, @@ -1079,7 +1155,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, @@ -1098,7 +1174,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = srem <8 x i64> undef, @@ -1117,7 +1193,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, @@ -1136,7 +1212,7 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, @@ -1193,7 +1269,7 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_uniformconstpow2' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, @@ -1374,10 +1450,10 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_constnegpow2' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1393,10 +1469,10 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_constnegpow2' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1412,10 +1488,10 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_constnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1431,10 +1507,10 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_constnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1488,10 +1564,10 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_constnegpow2' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1550,10 +1626,10 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'urem_constnegpow2' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1569,10 +1645,10 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'urem_constnegpow2' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1588,10 +1664,10 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_constnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1607,10 +1683,10 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_constnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1626,10 +1702,10 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'urem_constnegpow2' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1688,10 +1764,10 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstnegpow2' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1707,10 +1783,10 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstnegpow2' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1726,10 +1802,10 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1745,10 +1821,10 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1764,10 +1840,10 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_uniformconstnegpow2' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1826,10 +1902,10 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'urem_uniformconstnegpow2' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1845,10 +1921,10 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'urem_uniformconstnegpow2' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1864,10 +1940,10 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_uniformconstnegpow2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1883,10 +1959,10 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_uniformconstnegpow2' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, @@ -1902,10 +1978,10 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'urem_uniformconstnegpow2' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll @@ -1037,16 +1037,83 @@ ; SLM-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 ; SLM-NEXT: ret void ; -; AVX-LABEL: @umul_v16i32( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 -; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 -; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4 -; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4 -; AVX-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP3]], i32 3) -; AVX-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> [[TMP2]], <8 x i32> [[TMP4]], i32 3) -; AVX-NEXT: store <8 x i32> [[TMP5]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 -; AVX-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 -; AVX-NEXT: ret void +; AVX1-LABEL: @umul_v16i32( +; AVX1-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4 +; AVX1-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4 +; AVX1-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4 +; AVX1-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4 +; AVX1-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4 +; AVX1-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4 +; AVX1-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4 +; AVX1-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4 +; AVX1-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4 +; AVX1-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4 +; AVX1-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4 +; AVX1-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4 +; AVX1-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4 +; AVX1-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4 +; AVX1-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4 +; AVX1-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4 +; AVX1-NEXT: [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4 +; AVX1-NEXT: [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4 +; AVX1-NEXT: [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4 +; AVX1-NEXT: [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4 +; AVX1-NEXT: [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4 +; AVX1-NEXT: [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4 +; AVX1-NEXT: [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4 +; AVX1-NEXT: [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4 +; AVX1-NEXT: [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4 +; AVX1-NEXT: [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4 +; AVX1-NEXT: [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4 +; AVX1-NEXT: [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4 +; AVX1-NEXT: [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4 +; AVX1-NEXT: [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4 +; AVX1-NEXT: [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4 +; AVX1-NEXT: [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4 +; AVX1-NEXT: [[R0:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A0]], i32 [[B0]], i32 3) +; AVX1-NEXT: [[R1:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A1]], i32 [[B1]], i32 3) +; AVX1-NEXT: [[R2:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A2]], i32 [[B2]], i32 3) +; AVX1-NEXT: [[R3:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A3]], i32 [[B3]], i32 3) +; AVX1-NEXT: [[R4:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A4]], i32 [[B4]], i32 3) +; AVX1-NEXT: [[R5:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A5]], i32 [[B5]], i32 3) +; AVX1-NEXT: [[R6:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A6]], i32 [[B6]], i32 3) +; AVX1-NEXT: [[R7:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A7]], i32 [[B7]], i32 3) +; AVX1-NEXT: [[R8:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A8]], i32 [[B8]], i32 3) +; AVX1-NEXT: [[R9:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A9]], i32 [[B9]], i32 3) +; AVX1-NEXT: [[R10:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A10]], i32 [[B10]], i32 3) +; AVX1-NEXT: [[R11:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A11]], i32 [[B11]], i32 3) +; AVX1-NEXT: [[R12:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A12]], i32 [[B12]], i32 3) +; AVX1-NEXT: [[R13:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A13]], i32 [[B13]], i32 3) +; AVX1-NEXT: [[R14:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A14]], i32 [[B14]], i32 3) +; AVX1-NEXT: [[R15:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A15]], i32 [[B15]], i32 3) +; AVX1-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4 +; AVX1-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4 +; AVX1-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4 +; AVX1-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4 +; AVX1-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4 +; AVX1-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4 +; AVX1-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4 +; AVX1-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4 +; AVX1-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4 +; AVX1-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4 +; AVX1-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4 +; AVX1-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4 +; AVX1-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4 +; AVX1-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4 +; AVX1-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4 +; AVX1-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4 +; AVX1-NEXT: ret void +; +; AVX2-LABEL: @umul_v16i32( +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX2-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP3]], i32 3) +; AVX2-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> [[TMP2]], <8 x i32> [[TMP4]], i32 3) +; AVX2-NEXT: store <8 x i32> [[TMP5]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 +; AVX2-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX2-NEXT: ret void ; ; AVX512-LABEL: @umul_v16i32( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4 @@ -1054,6 +1121,17 @@ ; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]], i32 3) ; AVX512-NEXT: store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4 ; AVX512-NEXT: ret void +; +; AVX256BW-LABEL: @umul_v16i32( +; AVX256BW-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4 +; AVX256BW-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX256BW-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4 +; AVX256BW-NEXT: [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX256BW-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP3]], i32 3) +; AVX256BW-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> [[TMP2]], <8 x i32> [[TMP4]], i32 3) +; AVX256BW-NEXT: store <8 x i32> [[TMP5]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4 +; AVX256BW-NEXT: store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4 +; AVX256BW-NEXT: ret void ; %a0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4 %a1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll @@ -94,35 +94,40 @@ ; SLM-NEXT: ret void ; ; AVX128-LABEL: @mul_v8i64( -; AVX128-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8 -; AVX128-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8 -; AVX128-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8 -; AVX128-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8 -; AVX128-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8 -; AVX128-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8 -; AVX128-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8 -; AVX128-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8 -; AVX128-NEXT: [[TMP9:%.*]] = mul <2 x i64> [[TMP1]], [[TMP5]] -; AVX128-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[TMP2]], [[TMP6]] -; AVX128-NEXT: [[TMP11:%.*]] = mul <2 x i64> [[TMP3]], [[TMP7]] -; AVX128-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[TMP4]], [[TMP8]] -; AVX128-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8 -; AVX128-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8 -; AVX128-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8 -; AVX128-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8 +; AVX128-NEXT: [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8 +; AVX128-NEXT: [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8 +; AVX128-NEXT: [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8 +; AVX128-NEXT: [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8 +; AVX128-NEXT: [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8 +; AVX128-NEXT: [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8 +; AVX128-NEXT: [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8 +; AVX128-NEXT: [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8 +; AVX128-NEXT: [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8 +; AVX128-NEXT: [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8 +; AVX128-NEXT: [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8 +; AVX128-NEXT: [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8 +; AVX128-NEXT: [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8 +; AVX128-NEXT: [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8 +; AVX128-NEXT: [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8 +; AVX128-NEXT: [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8 +; AVX128-NEXT: [[R0:%.*]] = mul i64 [[A0]], [[B0]] +; AVX128-NEXT: [[R1:%.*]] = mul i64 [[A1]], [[B1]] +; AVX128-NEXT: [[R2:%.*]] = mul i64 [[A2]], [[B2]] +; AVX128-NEXT: [[R3:%.*]] = mul i64 [[A3]], [[B3]] +; AVX128-NEXT: [[R4:%.*]] = mul i64 [[A4]], [[B4]] +; AVX128-NEXT: [[R5:%.*]] = mul i64 [[A5]], [[B5]] +; AVX128-NEXT: [[R6:%.*]] = mul i64 [[A6]], [[B6]] +; AVX128-NEXT: [[R7:%.*]] = mul i64 [[A7]], [[B7]] +; AVX128-NEXT: store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8 +; AVX128-NEXT: store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8 +; AVX128-NEXT: store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8 +; AVX128-NEXT: store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8 +; AVX128-NEXT: store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8 +; AVX128-NEXT: store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8 +; AVX128-NEXT: store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8 +; AVX128-NEXT: store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8 ; AVX128-NEXT: ret void ; -; AVX256-LABEL: @mul_v8i64( -; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8 -; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8 -; AVX256-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8 -; AVX256-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8 -; AVX256-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP1]], [[TMP3]] -; AVX256-NEXT: [[TMP6:%.*]] = mul <4 x i64> [[TMP2]], [[TMP4]] -; AVX256-NEXT: store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8 -; AVX256-NEXT: store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8 -; AVX256-NEXT: ret void -; ; AVX512-LABEL: @mul_v8i64( ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8 ; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE -; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 +; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 define void @powof2mul_uniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){ ; CHECK-LABEL: @powof2mul_uniform( @@ -267,28 +267,71 @@ ; SSE-NEXT: store i64 [[ADD7]], i64* [[GEP7]], align 8 ; SSE-NEXT: ret void ; -; AVX-LABEL: @PR51436( -; AVX-NEXT: entry: -; AVX-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 1 -; AVX-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 2 -; AVX-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 3 -; AVX-NEXT: [[GEP4:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 4 -; AVX-NEXT: [[GEP5:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 5 -; AVX-NEXT: [[GEP6:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 6 -; AVX-NEXT: [[GEP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 7 -; AVX-NEXT: [[TMP0:%.*]] = bitcast i64* [[A]] to <4 x i64>* -; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 8 -; AVX-NEXT: [[TMP2:%.*]] = bitcast i64* [[GEP4]] to <4 x i64>* -; AVX-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8 -; AVX-NEXT: [[TMP4:%.*]] = mul <4 x i64> [[TMP1]], -; AVX-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP3]], -; AVX-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], -; AVX-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], -; AVX-NEXT: [[TMP8:%.*]] = bitcast i64* [[A]] to <4 x i64>* -; AVX-NEXT: store <4 x i64> [[TMP6]], <4 x i64>* [[TMP8]], align 8 -; AVX-NEXT: [[TMP9:%.*]] = bitcast i64* [[GEP4]] to <4 x i64>* -; AVX-NEXT: store <4 x i64> [[TMP7]], <4 x i64>* [[TMP9]], align 8 -; AVX-NEXT: ret void +; AVX1-LABEL: @PR51436( +; AVX1-NEXT: entry: +; AVX1-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 1 +; AVX1-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 2 +; AVX1-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 3 +; AVX1-NEXT: [[GEP4:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 4 +; AVX1-NEXT: [[GEP5:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 5 +; AVX1-NEXT: [[GEP6:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 6 +; AVX1-NEXT: [[GEP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 7 +; AVX1-NEXT: [[LOAD0:%.*]] = load i64, i64* [[A]], align 8 +; AVX1-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8 +; AVX1-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8 +; AVX1-NEXT: [[LOAD3:%.*]] = load i64, i64* [[GEP3]], align 8 +; AVX1-NEXT: [[LOAD4:%.*]] = load i64, i64* [[GEP4]], align 8 +; AVX1-NEXT: [[LOAD5:%.*]] = load i64, i64* [[GEP5]], align 8 +; AVX1-NEXT: [[LOAD6:%.*]] = load i64, i64* [[GEP6]], align 8 +; AVX1-NEXT: [[LOAD7:%.*]] = load i64, i64* [[GEP7]], align 8 +; AVX1-NEXT: [[MUL0:%.*]] = mul i64 [[LOAD0]], -17592186044416 +; AVX1-NEXT: [[MUL1:%.*]] = mul i64 [[LOAD1]], -17592186044416 +; AVX1-NEXT: [[MUL2:%.*]] = mul i64 [[LOAD2]], -17592186044416 +; AVX1-NEXT: [[MUL3:%.*]] = mul i64 [[LOAD3]], -17592186044416 +; AVX1-NEXT: [[MUL4:%.*]] = mul i64 [[LOAD4]], -17592186044416 +; AVX1-NEXT: [[MUL5:%.*]] = mul i64 [[LOAD5]], -17592186044416 +; AVX1-NEXT: [[MUL6:%.*]] = mul i64 [[LOAD6]], -17592186044416 +; AVX1-NEXT: [[MUL7:%.*]] = mul i64 [[LOAD7]], -17592186044416 +; AVX1-NEXT: [[ADD0:%.*]] = add i64 [[MUL0]], -17592186044416 +; AVX1-NEXT: [[ADD1:%.*]] = add i64 [[MUL1]], -17592186044416 +; AVX1-NEXT: [[ADD2:%.*]] = add i64 [[MUL2]], -17592186044416 +; AVX1-NEXT: [[ADD3:%.*]] = add i64 [[MUL3]], -17592186044416 +; AVX1-NEXT: [[ADD4:%.*]] = add i64 [[MUL4]], -17592186044416 +; AVX1-NEXT: [[ADD5:%.*]] = add i64 [[MUL5]], -17592186044416 +; AVX1-NEXT: [[ADD6:%.*]] = add i64 [[MUL6]], -17592186044416 +; AVX1-NEXT: [[ADD7:%.*]] = add i64 [[MUL7]], -17592186044416 +; AVX1-NEXT: store i64 [[ADD0]], i64* [[A]], align 8 +; AVX1-NEXT: store i64 [[ADD1]], i64* [[GEP1]], align 8 +; AVX1-NEXT: store i64 [[ADD2]], i64* [[GEP2]], align 8 +; AVX1-NEXT: store i64 [[ADD3]], i64* [[GEP3]], align 8 +; AVX1-NEXT: store i64 [[ADD4]], i64* [[GEP4]], align 8 +; AVX1-NEXT: store i64 [[ADD5]], i64* [[GEP5]], align 8 +; AVX1-NEXT: store i64 [[ADD6]], i64* [[GEP6]], align 8 +; AVX1-NEXT: store i64 [[ADD7]], i64* [[GEP7]], align 8 +; AVX1-NEXT: ret void +; +; AVX2-LABEL: @PR51436( +; AVX2-NEXT: entry: +; AVX2-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 1 +; AVX2-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 2 +; AVX2-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 3 +; AVX2-NEXT: [[GEP4:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 4 +; AVX2-NEXT: [[GEP5:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 5 +; AVX2-NEXT: [[GEP6:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 6 +; AVX2-NEXT: [[GEP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 7 +; AVX2-NEXT: [[TMP0:%.*]] = bitcast i64* [[A]] to <4 x i64>* +; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 8 +; AVX2-NEXT: [[TMP2:%.*]] = bitcast i64* [[GEP4]] to <4 x i64>* +; AVX2-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8 +; AVX2-NEXT: [[TMP4:%.*]] = mul <4 x i64> [[TMP1]], +; AVX2-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP3]], +; AVX2-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], +; AVX2-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], +; AVX2-NEXT: [[TMP8:%.*]] = bitcast i64* [[A]] to <4 x i64>* +; AVX2-NEXT: store <4 x i64> [[TMP6]], <4 x i64>* [[TMP8]], align 8 +; AVX2-NEXT: [[TMP9:%.*]] = bitcast i64* [[GEP4]] to <4 x i64>* +; AVX2-NEXT: store <4 x i64> [[TMP7]], <4 x i64>* [[TMP9]], align 8 +; AVX2-NEXT: ret void ; entry: %gep1 = getelementptr inbounds i64, i64* %a, i64 1