diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-add-sub.ll b/llvm/test/Analysis/CostModel/X86/shuffle-add-sub.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-add-sub.ll @@ -0,0 +1,1401 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse3 | FileCheck %s -check-prefixes=SSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse3 | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse3 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse3 | FileCheck %s -check-prefixes=AVX512 + +; This test checks that the cost of an fadd-fsub-shuffle pattern is detected. +; These patterns correspond to these x86 instructions: +; ADDSUBPS 4xf32 SSE3 +; VADDSUBPS 4xf32 AVX +; VADDSUBPS 8xf32 AVX2 +; ADDSUBPD 2xf64 SSE3 +; VADDSUBPD 2xf64 AVX +; VADDSUBPD 4xf64 AVX2 + +; NOTE: This is the type pattern that can be detected by TTI: +; %add = fadd <2 x double> undef, undef +; %sub = fsub <2 x double> undef, undef +; %addsub = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> + +define void @shuffle_addsub() { +; SSE-LABEL: 'shuffle_addsub' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %sub_2xi8, <2 x i8> %add_2xi8, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %sub_4xi8, <4 x i8> %add_4xi8, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %sub_8xi8, <8 x i8> %add_8xi8, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %sub_16xi8, <16 x i8> %add_16xi8, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %sub_32xi8, <32 x i8> %add_32xi8, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %sub_64xi8, <64 x i8> %add_64xi8, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %sub_2xi16, <2 x i16> %add_2xi16, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %sub_4xi16, <4 x i16> %add_4xi16, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %sub_8xi16, <8 x i16> %add_8xi16, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %sub_16xi16, <16 x i16> %add_16xi16, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %sub_32xi16, <32 x i16> %add_32xi16, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %sub_2xi32, <2 x i32> %add_2xi32, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %sub_4xi32, <4 x i32> %add_4xi32, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %sub_8xi32, <8 x i32> %add_8xi32, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %sub_16xi32, <16 x i32> %add_16xi32, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %sub_2xi64, <2 x i64> %add_2xi64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %sub_4xi64, <4 x i64> %add_4xi64, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %sub_8xi64, <8 x i64> %add_8xi64, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %sub_2xf16, <2 x half> %add_2xf16, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %sub_4xf16, <4 x half> %add_4xf16, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %sub_8xf16, <8 x half> %add_8xf16, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %sub_16xf16, <16 x half> %add_16xf16, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %sub_32xf16, <32 x half> %add_32xf16, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %sub_2xf32, <2 x float> %add_2xf32, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %sub_4xf32, <4 x float> %add_4xf32, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %sub_8xf32, <8 x float> %add_8xf32, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %sub_16xf32, <16 x float> %add_16xf32, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %sub_2xf64, <2 x double> %add_2xf64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %sub_4xf64, <4 x double> %add_4xf64, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %sub_8xf64, <8 x double> %add_8xf64, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE2-LABEL: 'shuffle_addsub' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %sub_2xi8, <2 x i8> %add_2xi8, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %sub_4xi8, <4 x i8> %add_4xi8, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %sub_8xi8, <8 x i8> %add_8xi8, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %sub_16xi8, <16 x i8> %add_16xi8, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %sub_32xi8, <32 x i8> %add_32xi8, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %sub_64xi8, <64 x i8> %add_64xi8, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %sub_2xi16, <2 x i16> %add_2xi16, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %sub_4xi16, <4 x i16> %add_4xi16, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %sub_8xi16, <8 x i16> %add_8xi16, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %sub_16xi16, <16 x i16> %add_16xi16, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %sub_32xi16, <32 x i16> %add_32xi16, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %sub_2xi32, <2 x i32> %add_2xi32, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %sub_4xi32, <4 x i32> %add_4xi32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %sub_8xi32, <8 x i32> %add_8xi32, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %sub_16xi32, <16 x i32> %add_16xi32, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %sub_2xi64, <2 x i64> %add_2xi64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %sub_4xi64, <4 x i64> %add_4xi64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %sub_8xi64, <8 x i64> %add_8xi64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %sub_2xf16, <2 x half> %add_2xf16, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %sub_4xf16, <4 x half> %add_4xf16, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %sub_8xf16, <8 x half> %add_8xf16, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %sub_16xf16, <16 x half> %add_16xf16, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %sub_32xf16, <32 x half> %add_32xf16, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %sub_2xf32, <2 x float> %add_2xf32, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %sub_4xf32, <4 x float> %add_4xf32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %sub_8xf32, <8 x float> %add_8xf32, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %sub_16xf32, <16 x float> %add_16xf32, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %sub_2xf64, <2 x double> %add_2xf64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %sub_4xf64, <4 x double> %add_4xf64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %sub_8xf64, <8 x double> %add_8xf64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'shuffle_addsub' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %sub_2xi8, <2 x i8> %add_2xi8, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %sub_4xi8, <4 x i8> %add_4xi8, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %sub_8xi8, <8 x i8> %add_8xi8, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %sub_16xi8, <16 x i8> %add_16xi8, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %sub_32xi8, <32 x i8> %add_32xi8, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %sub_64xi8, <64 x i8> %add_64xi8, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %sub_2xi16, <2 x i16> %add_2xi16, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %sub_4xi16, <4 x i16> %add_4xi16, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %sub_8xi16, <8 x i16> %add_8xi16, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %sub_16xi16, <16 x i16> %add_16xi16, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %sub_32xi16, <32 x i16> %add_32xi16, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %sub_2xi32, <2 x i32> %add_2xi32, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %sub_4xi32, <4 x i32> %add_4xi32, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %sub_8xi32, <8 x i32> %add_8xi32, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %sub_16xi32, <16 x i32> %add_16xi32, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %sub_2xi64, <2 x i64> %add_2xi64, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %sub_4xi64, <4 x i64> %add_4xi64, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %sub_8xi64, <8 x i64> %add_8xi64, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %sub_2xf16, <2 x half> %add_2xf16, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %sub_4xf16, <4 x half> %add_4xf16, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %sub_8xf16, <8 x half> %add_8xf16, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %sub_16xf16, <16 x half> %add_16xf16, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %sub_32xf16, <32 x half> %add_32xf16, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %sub_2xf32, <2 x float> %add_2xf32, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %sub_4xf32, <4 x float> %add_4xf32, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %sub_8xf32, <8 x float> %add_8xf32, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %sub_16xf32, <16 x float> %add_16xf32, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %sub_2xf64, <2 x double> %add_2xf64, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %sub_4xf64, <4 x double> %add_4xf64, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %sub_8xf64, <8 x double> %add_8xf64, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'shuffle_addsub' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %sub_2xi8, <2 x i8> %add_2xi8, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %sub_4xi8, <4 x i8> %add_4xi8, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %sub_8xi8, <8 x i8> %add_8xi8, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %sub_16xi8, <16 x i8> %add_16xi8, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %sub_32xi8, <32 x i8> %add_32xi8, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %sub_64xi8, <64 x i8> %add_64xi8, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %sub_2xi16, <2 x i16> %add_2xi16, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %sub_4xi16, <4 x i16> %add_4xi16, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %sub_8xi16, <8 x i16> %add_8xi16, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %sub_16xi16, <16 x i16> %add_16xi16, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %sub_32xi16, <32 x i16> %add_32xi16, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %sub_2xi32, <2 x i32> %add_2xi32, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %sub_4xi32, <4 x i32> %add_4xi32, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %sub_8xi32, <8 x i32> %add_8xi32, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %sub_16xi32, <16 x i32> %add_16xi32, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %sub_2xi64, <2 x i64> %add_2xi64, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %sub_4xi64, <4 x i64> %add_4xi64, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %sub_8xi64, <8 x i64> %add_8xi64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %sub_2xf16, <2 x half> %add_2xf16, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %sub_4xf16, <4 x half> %add_4xf16, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %sub_8xf16, <8 x half> %add_8xf16, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %sub_16xf16, <16 x half> %add_16xf16, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %sub_32xf16, <32 x half> %add_32xf16, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %sub_2xf32, <2 x float> %add_2xf32, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %sub_4xf32, <4 x float> %add_4xf32, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %sub_8xf32, <8 x float> %add_8xf32, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %sub_16xf32, <16 x float> %add_16xf32, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %sub_2xf64, <2 x double> %add_2xf64, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %sub_4xf64, <4 x double> %add_4xf64, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %sub_8xf64, <8 x double> %add_8xf64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'shuffle_addsub' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %sub_2xi8, <2 x i8> %add_2xi8, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %sub_4xi8, <4 x i8> %add_4xi8, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %sub_8xi8, <8 x i8> %add_8xi8, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %sub_16xi8, <16 x i8> %add_16xi8, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %sub_32xi8, <32 x i8> %add_32xi8, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %sub_64xi8, <64 x i8> %add_64xi8, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %sub_2xi16, <2 x i16> %add_2xi16, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %sub_4xi16, <4 x i16> %add_4xi16, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %sub_8xi16, <8 x i16> %add_8xi16, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %sub_16xi16, <16 x i16> %add_16xi16, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %sub_32xi16, <32 x i16> %add_32xi16, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %sub_2xi32, <2 x i32> %add_2xi32, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %sub_4xi32, <4 x i32> %add_4xi32, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %sub_8xi32, <8 x i32> %add_8xi32, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %sub_16xi32, <16 x i32> %add_16xi32, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %sub_2xi64, <2 x i64> %add_2xi64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %sub_4xi64, <4 x i64> %add_4xi64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %sub_8xi64, <8 x i64> %add_8xi64, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %sub_2xf16, <2 x half> %add_2xf16, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %sub_4xf16, <4 x half> %add_4xf16, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %sub_8xf16, <8 x half> %add_8xf16, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %sub_16xf16, <16 x half> %add_16xf16, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %sub_32xf16, <32 x half> %add_32xf16, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %sub_2xf32, <2 x float> %add_2xf32, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %sub_4xf32, <4 x float> %add_4xf32, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %sub_8xf32, <8 x float> %add_8xf32, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %sub_16xf32, <16 x float> %add_16xf32, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %sub_2xf64, <2 x double> %add_2xf64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %sub_4xf64, <4 x double> %add_4xf64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %sub_8xf64, <8 x double> %add_8xf64, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'shuffle_addsub' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %sub_2xi8, <2 x i8> %add_2xi8, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %sub_4xi8, <4 x i8> %add_4xi8, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %sub_8xi8, <8 x i8> %add_8xi8, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %sub_16xi8, <16 x i8> %add_16xi8, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %sub_32xi8, <32 x i8> %add_32xi8, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %sub_64xi8, <64 x i8> %add_64xi8, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %sub_2xi16, <2 x i16> %add_2xi16, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %sub_4xi16, <4 x i16> %add_4xi16, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %sub_8xi16, <8 x i16> %add_8xi16, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %sub_16xi16, <16 x i16> %add_16xi16, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %sub_32xi16, <32 x i16> %add_32xi16, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %sub_2xi32, <2 x i32> %add_2xi32, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %sub_4xi32, <4 x i32> %add_4xi32, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %sub_8xi32, <8 x i32> %add_8xi32, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %sub_16xi32, <16 x i32> %add_16xi32, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %sub_2xi64, <2 x i64> %add_2xi64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %sub_4xi64, <4 x i64> %add_4xi64, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %sub_8xi64, <8 x i64> %add_8xi64, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %sub_2xf16, <2 x half> %add_2xf16, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %sub_4xf16, <4 x half> %add_4xf16, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %sub_8xf16, <8 x half> %add_8xf16, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %sub_16xf16, <16 x half> %add_16xf16, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %sub_32xf16, <32 x half> %add_32xf16, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %sub_2xf32, <2 x float> %add_2xf32, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %sub_4xf32, <4 x float> %add_4xf32, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %sub_8xf32, <8 x float> %add_8xf32, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %sub_16xf32, <16 x float> %add_16xf32, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %sub_2xf64, <2 x double> %add_2xf64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %sub_4xf64, <4 x double> %add_4xf64, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %sub_8xf64, <8 x double> %add_8xf64, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + + + %add_2xi8 = add <2 x i8> undef, undef + %sub_2xi8 = sub <2 x i8> undef, undef + %addsub_2xi8 = shufflevector <2 x i8> %sub_2xi8, <2 x i8> %add_2xi8, <2 x i32> + + %add_4xi8 = add <4 x i8> undef, undef + %sub_4xi8 = sub <4 x i8> undef, undef + %addsub_4xi8 = shufflevector <4 x i8> %sub_4xi8, <4 x i8> %add_4xi8, <4 x i32> + + %add_8xi8 = add <8 x i8> undef, undef + %sub_8xi8 = sub <8 x i8> undef, undef + %addsub_8xi8 = shufflevector <8 x i8> %sub_8xi8, <8 x i8> %add_8xi8, <8 x i32> + + %add_16xi8 = add <16 x i8> undef, undef + %sub_16xi8 = sub <16 x i8> undef, undef + %addsub_16xi8 = shufflevector <16 x i8> %sub_16xi8, <16 x i8> %add_16xi8, <16 x i32> + + %add_32xi8 = add <32 x i8> undef, undef + %sub_32xi8 = sub <32 x i8> undef, undef + %addsub_32xi8 = shufflevector <32 x i8> %sub_32xi8, <32 x i8> %add_32xi8, <32 x i32> + + %add_64xi8 = add <64 x i8> undef, undef + %sub_64xi8 = sub <64 x i8> undef, undef + %addsub_64xi8 = shufflevector <64 x i8> %sub_64xi8, <64 x i8> %add_64xi8, <64 x i32> + + + %add_2xi16 = add <2 x i16> undef, undef + %sub_2xi16 = sub <2 x i16> undef, undef + %addsub_2xi16 = shufflevector <2 x i16> %sub_2xi16, <2 x i16> %add_2xi16, <2 x i32> + + %add_4xi16 = add <4 x i16> undef, undef + %sub_4xi16 = sub <4 x i16> undef, undef + %addsub_4xi16 = shufflevector <4 x i16> %sub_4xi16, <4 x i16> %add_4xi16, <4 x i32> + + %add_8xi16 = add <8 x i16> undef, undef + %sub_8xi16 = sub <8 x i16> undef, undef + %addsub_8xi16 = shufflevector <8 x i16> %sub_8xi16, <8 x i16> %add_8xi16, <8 x i32> + + %add_16xi16 = add <16 x i16> undef, undef + %sub_16xi16 = sub <16 x i16> undef, undef + %addsub_16xi16 = shufflevector <16 x i16> %sub_16xi16, <16 x i16> %add_16xi16, <16 x i32> + + %add_32xi16 = add <32 x i16> undef, undef + %sub_32xi16 = sub <32 x i16> undef, undef + %addsub_32xi16 = shufflevector <32 x i16> %sub_32xi16, <32 x i16> %add_32xi16, <32 x i32> + + + %add_2xi32 = add <2 x i32> undef, undef + %sub_2xi32 = sub <2 x i32> undef, undef + %addsub_2xi32 = shufflevector <2 x i32> %sub_2xi32, <2 x i32> %add_2xi32, <2 x i32> + + %add_4xi32 = add <4 x i32> undef, undef + %sub_4xi32 = sub <4 x i32> undef, undef + %addsub_4xi32 = shufflevector <4 x i32> %sub_4xi32, <4 x i32> %add_4xi32, <4 x i32> + + %add_8xi32 = add <8 x i32> undef, undef + %sub_8xi32 = sub <8 x i32> undef, undef + %addsub_8xi32 = shufflevector <8 x i32> %sub_8xi32, <8 x i32> %add_8xi32, <8 x i32> + + %add_16xi32 = add <16 x i32> undef, undef + %sub_16xi32 = sub <16 x i32> undef, undef + %addsub_16xi32 = shufflevector <16 x i32> %sub_16xi32, <16 x i32> %add_16xi32, <16 x i32> + + + %add_2xi64 = add <2 x i64> undef, undef + %sub_2xi64 = sub <2 x i64> undef, undef + %addsub_2xi64 = shufflevector <2 x i64> %sub_2xi64, <2 x i64> %add_2xi64, <2 x i32> + + %add_4xi64 = add <4 x i64> undef, undef + %sub_4xi64 = sub <4 x i64> undef, undef + %addsub_4xi64 = shufflevector <4 x i64> %sub_4xi64, <4 x i64> %add_4xi64, <4 x i32> + + %add_8xi64 = add <8 x i64> undef, undef + %sub_8xi64 = sub <8 x i64> undef, undef + %addsub_8xi64 = shufflevector <8 x i64> %sub_8xi64, <8 x i64> %add_8xi64, <8 x i32> + + + %add_2xf16 = fadd <2 x half> undef, undef + %sub_2xf16 = fsub <2 x half> undef, undef + %addsub_2xf16 = shufflevector <2 x half> %sub_2xf16, <2 x half> %add_2xf16, <2 x i32> + + %add_4xf16 = fadd <4 x half> undef, undef + %sub_4xf16 = fsub <4 x half> undef, undef + %addsub_4xf16 = shufflevector <4 x half> %sub_4xf16, <4 x half> %add_4xf16, <4 x i32> + + %add_8xf16 = fadd <8 x half> undef, undef + %sub_8xf16 = fsub <8 x half> undef, undef + %addsub_8xf16 = shufflevector <8 x half> %sub_8xf16, <8 x half> %add_8xf16, <8 x i32> + + %add_16xf16 = fadd <16 x half> undef, undef + %sub_16xf16 = fsub <16 x half> undef, undef + %addsub_16xf16 = shufflevector <16 x half> %sub_16xf16, <16 x half> %add_16xf16, <16 x i32> + + %add_32xf16 = fadd <32 x half> undef, undef + %sub_32xf16 = fsub <32 x half> undef, undef + %addsub_32xf16 = shufflevector <32 x half> %sub_32xf16, <32 x half> %add_32xf16, <32 x i32> + + + %add_2xf32 = fadd <2 x float> undef, undef + %sub_2xf32 = fsub <2 x float> undef, undef + %addsub_2xf32 = shufflevector <2 x float> %sub_2xf32, <2 x float> %add_2xf32, <2 x i32> + + %add_4xf32 = fadd <4 x float> undef, undef + %sub_4xf32 = fsub <4 x float> undef, undef + %addsub_4xf32 = shufflevector <4 x float> %sub_4xf32, <4 x float> %add_4xf32, <4 x i32> + + %add_8xf32 = fadd <8 x float> undef, undef + %sub_8xf32 = fsub <8 x float> undef, undef + %addsub_8xf32 = shufflevector <8 x float> %sub_8xf32, <8 x float> %add_8xf32, <8 x i32> + + %add_16xf32 = fadd <16 x float> undef, undef + %sub_16xf32 = fsub <16 x float> undef, undef + %addsub_16xf32 = shufflevector <16 x float> %sub_16xf32, <16 x float> %add_16xf32, <16 x i32> + + + %add_2xf64 = fadd <2 x double> undef, undef + %sub_2xf64 = fsub <2 x double> undef, undef + %addsub_2xf64 = shufflevector <2 x double> %sub_2xf64, <2 x double> %add_2xf64, <2 x i32> + + %add_4xf64 = fadd <4 x double> undef, undef + %sub_4xf64 = fsub <4 x double> undef, undef + %addsub_4xf64 = shufflevector <4 x double> %sub_4xf64, <4 x double> %add_4xf64, <4 x i32> + + %add_8xf64 = fadd <8 x double> undef, undef + %sub_8xf64 = fsub <8 x double> undef, undef + %addsub_8xf64 = shufflevector <8 x double> %sub_8xf64, <8 x double> %add_8xf64, <8 x i32> + + ret void +} + + +; For completeness we also include sub-add patterns +define void @shuffle_subadd() { +; SSE-LABEL: 'shuffle_subadd' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %add_2xi8, <2 x i8> %sub_2xi8, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %add_4xi8, <4 x i8> %sub_4xi8, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %add_8xi8, <8 x i8> %sub_8xi8, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %add_16xi8, <16 x i8> %sub_16xi8, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %add_32xi8, <32 x i8> %sub_32xi8, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %add_64xi8, <64 x i8> %sub_64xi8, <64 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %add_2xi16, <2 x i16> %sub_2xi16, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %add_4xi16, <4 x i16> %sub_4xi16, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %add_8xi16, <8 x i16> %sub_8xi16, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %add_16xi16, <16 x i16> %sub_16xi16, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %add_32xi16, <32 x i16> %sub_32xi16, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %add_2xi32, <2 x i32> %sub_2xi32, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %add_4xi32, <4 x i32> %sub_4xi32, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %add_8xi32, <8 x i32> %sub_8xi32, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %add_16xi32, <16 x i32> %sub_16xi32, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %add_2xi64, <2 x i64> %sub_2xi64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %add_4xi64, <4 x i64> %sub_4xi64, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %add_8xi64, <8 x i64> %sub_8xi64, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %add_2xf16, <2 x half> %sub_2xf16, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %add_4xf16, <4 x half> %sub_4xf16, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %add_8xf16, <8 x half> %sub_8xf16, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %add_16xf16, <16 x half> %sub_16xf16, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %add_32xf16, <32 x half> %sub_32xf16, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %add_2xf32, <2 x float> %sub_2xf32, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %add_4xf32, <4 x float> %sub_4xf32, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %add_8xf32, <8 x float> %sub_8xf32, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %add_16xf32, <16 x float> %sub_16xf32, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %add_2xf64, <2 x double> %sub_2xf64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %add_4xf64, <4 x double> %sub_4xf64, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %add_8xf64, <8 x double> %sub_8xf64, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE2-LABEL: 'shuffle_subadd' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %add_2xi8, <2 x i8> %sub_2xi8, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %add_4xi8, <4 x i8> %sub_4xi8, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %add_8xi8, <8 x i8> %sub_8xi8, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %add_16xi8, <16 x i8> %sub_16xi8, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %add_32xi8, <32 x i8> %sub_32xi8, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %add_64xi8, <64 x i8> %sub_64xi8, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %add_2xi16, <2 x i16> %sub_2xi16, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %add_4xi16, <4 x i16> %sub_4xi16, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %add_8xi16, <8 x i16> %sub_8xi16, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %add_16xi16, <16 x i16> %sub_16xi16, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %add_32xi16, <32 x i16> %sub_32xi16, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %add_2xi32, <2 x i32> %sub_2xi32, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %add_4xi32, <4 x i32> %sub_4xi32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %add_8xi32, <8 x i32> %sub_8xi32, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %add_16xi32, <16 x i32> %sub_16xi32, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %add_2xi64, <2 x i64> %sub_2xi64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %add_4xi64, <4 x i64> %sub_4xi64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %add_8xi64, <8 x i64> %sub_8xi64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %add_2xf16, <2 x half> %sub_2xf16, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %add_4xf16, <4 x half> %sub_4xf16, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %add_8xf16, <8 x half> %sub_8xf16, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %add_16xf16, <16 x half> %sub_16xf16, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %add_32xf16, <32 x half> %sub_32xf16, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %add_2xf32, <2 x float> %sub_2xf32, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %add_4xf32, <4 x float> %sub_4xf32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %add_8xf32, <8 x float> %sub_8xf32, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %add_16xf32, <16 x float> %sub_16xf32, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %add_2xf64, <2 x double> %sub_2xf64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %add_4xf64, <4 x double> %sub_4xf64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %add_8xf64, <8 x double> %sub_8xf64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'shuffle_subadd' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %add_2xi8, <2 x i8> %sub_2xi8, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %add_4xi8, <4 x i8> %sub_4xi8, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %add_8xi8, <8 x i8> %sub_8xi8, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %add_16xi8, <16 x i8> %sub_16xi8, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %add_32xi8, <32 x i8> %sub_32xi8, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %add_64xi8, <64 x i8> %sub_64xi8, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %add_2xi16, <2 x i16> %sub_2xi16, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %add_4xi16, <4 x i16> %sub_4xi16, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %add_8xi16, <8 x i16> %sub_8xi16, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %add_16xi16, <16 x i16> %sub_16xi16, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %add_32xi16, <32 x i16> %sub_32xi16, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %add_2xi32, <2 x i32> %sub_2xi32, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %add_4xi32, <4 x i32> %sub_4xi32, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %add_8xi32, <8 x i32> %sub_8xi32, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %add_16xi32, <16 x i32> %sub_16xi32, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %add_2xi64, <2 x i64> %sub_2xi64, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %add_4xi64, <4 x i64> %sub_4xi64, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %add_8xi64, <8 x i64> %sub_8xi64, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %add_2xf16, <2 x half> %sub_2xf16, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %add_4xf16, <4 x half> %sub_4xf16, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %add_8xf16, <8 x half> %sub_8xf16, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %add_16xf16, <16 x half> %sub_16xf16, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %add_32xf16, <32 x half> %sub_32xf16, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %add_2xf32, <2 x float> %sub_2xf32, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %add_4xf32, <4 x float> %sub_4xf32, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %add_8xf32, <8 x float> %sub_8xf32, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %add_16xf32, <16 x float> %sub_16xf32, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %add_2xf64, <2 x double> %sub_2xf64, <2 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %add_4xf64, <4 x double> %sub_4xf64, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %add_8xf64, <8 x double> %sub_8xf64, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'shuffle_subadd' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %add_2xi8, <2 x i8> %sub_2xi8, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %add_4xi8, <4 x i8> %sub_4xi8, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %add_8xi8, <8 x i8> %sub_8xi8, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %add_16xi8, <16 x i8> %sub_16xi8, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %add_32xi8, <32 x i8> %sub_32xi8, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %add_64xi8, <64 x i8> %sub_64xi8, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %add_2xi16, <2 x i16> %sub_2xi16, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %add_4xi16, <4 x i16> %sub_4xi16, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %add_8xi16, <8 x i16> %sub_8xi16, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %add_16xi16, <16 x i16> %sub_16xi16, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %add_32xi16, <32 x i16> %sub_32xi16, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %add_2xi32, <2 x i32> %sub_2xi32, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %add_4xi32, <4 x i32> %sub_4xi32, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %add_8xi32, <8 x i32> %sub_8xi32, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %add_16xi32, <16 x i32> %sub_16xi32, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %add_2xi64, <2 x i64> %sub_2xi64, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %add_4xi64, <4 x i64> %sub_4xi64, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %add_8xi64, <8 x i64> %sub_8xi64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %add_2xf16, <2 x half> %sub_2xf16, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %add_4xf16, <4 x half> %sub_4xf16, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %add_8xf16, <8 x half> %sub_8xf16, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %add_16xf16, <16 x half> %sub_16xf16, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %add_32xf16, <32 x half> %sub_32xf16, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %add_2xf32, <2 x float> %sub_2xf32, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %add_4xf32, <4 x float> %sub_4xf32, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %add_8xf32, <8 x float> %sub_8xf32, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %add_16xf32, <16 x float> %sub_16xf32, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %add_2xf64, <2 x double> %sub_2xf64, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %add_4xf64, <4 x double> %sub_4xf64, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %add_8xf64, <8 x double> %sub_8xf64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'shuffle_subadd' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %add_2xi8, <2 x i8> %sub_2xi8, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %add_4xi8, <4 x i8> %sub_4xi8, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %add_8xi8, <8 x i8> %sub_8xi8, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %add_16xi8, <16 x i8> %sub_16xi8, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %add_32xi8, <32 x i8> %sub_32xi8, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %add_64xi8, <64 x i8> %sub_64xi8, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %add_2xi16, <2 x i16> %sub_2xi16, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %add_4xi16, <4 x i16> %sub_4xi16, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %add_8xi16, <8 x i16> %sub_8xi16, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %add_16xi16, <16 x i16> %sub_16xi16, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %add_32xi16, <32 x i16> %sub_32xi16, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %add_2xi32, <2 x i32> %sub_2xi32, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %add_4xi32, <4 x i32> %sub_4xi32, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %add_8xi32, <8 x i32> %sub_8xi32, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %add_16xi32, <16 x i32> %sub_16xi32, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %add_2xi64, <2 x i64> %sub_2xi64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %add_4xi64, <4 x i64> %sub_4xi64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %add_8xi64, <8 x i64> %sub_8xi64, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %add_2xf16, <2 x half> %sub_2xf16, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %add_4xf16, <4 x half> %sub_4xf16, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %add_8xf16, <8 x half> %sub_8xf16, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %add_16xf16, <16 x half> %sub_16xf16, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %add_32xf16, <32 x half> %sub_32xf16, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %add_2xf32, <2 x float> %sub_2xf32, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %add_4xf32, <4 x float> %sub_4xf32, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %add_8xf32, <8 x float> %sub_8xf32, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %add_16xf32, <16 x float> %sub_16xf32, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %add_2xf64, <2 x double> %sub_2xf64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %add_4xf64, <4 x double> %sub_4xf64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %add_8xf64, <8 x double> %sub_8xf64, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'shuffle_subadd' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi8 = add <2 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi8 = sub <2 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi8 = shufflevector <2 x i8> %add_2xi8, <2 x i8> %sub_2xi8, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi8 = add <4 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi8 = sub <4 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi8 = shufflevector <4 x i8> %add_4xi8, <4 x i8> %sub_4xi8, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi8 = add <8 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi8 = sub <8 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi8 = shufflevector <8 x i8> %add_8xi8, <8 x i8> %sub_8xi8, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_16xi8 = add <16 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_16xi8 = sub <16 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_16xi8 = shufflevector <16 x i8> %add_16xi8, <16 x i8> %sub_16xi8, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_32xi8 = add <32 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_32xi8 = sub <32 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_32xi8 = shufflevector <32 x i8> %add_32xi8, <32 x i8> %sub_32xi8, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_64xi8 = add <64 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_64xi8 = sub <64 x i8> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_64xi8 = shufflevector <64 x i8> %add_64xi8, <64 x i8> %sub_64xi8, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi16 = add <2 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi16 = sub <2 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_2xi16 = shufflevector <2 x i16> %add_2xi16, <2 x i16> %sub_2xi16, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi16 = add <4 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi16 = sub <4 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_4xi16 = shufflevector <4 x i16> %add_4xi16, <4 x i16> %sub_4xi16, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_8xi16 = add <8 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_8xi16 = sub <8 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %addsub_8xi16 = shufflevector <8 x i16> %add_8xi16, <8 x i16> %sub_8xi16, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_16xi16 = add <16 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_16xi16 = sub <16 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %addsub_16xi16 = shufflevector <16 x i16> %add_16xi16, <16 x i16> %sub_16xi16, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_32xi16 = add <32 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_32xi16 = sub <32 x i16> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %addsub_32xi16 = shufflevector <32 x i16> %add_32xi16, <32 x i16> %sub_32xi16, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi32 = add <2 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi32 = sub <2 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xi32 = shufflevector <2 x i32> %add_2xi32, <2 x i32> %sub_2xi32, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_4xi32 = add <4 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_4xi32 = sub <4 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi32 = shufflevector <4 x i32> %add_4xi32, <4 x i32> %sub_4xi32, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_8xi32 = add <8 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_8xi32 = sub <8 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi32 = shufflevector <8 x i32> %add_8xi32, <8 x i32> %sub_8xi32, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_16xi32 = add <16 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_16xi32 = sub <16 x i32> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xi32 = shufflevector <16 x i32> %add_16xi32, <16 x i32> %sub_16xi32, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_2xi64 = add <2 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_2xi64 = sub <2 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xi64 = shufflevector <2 x i64> %add_2xi64, <2 x i64> %sub_2xi64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xi64 = add <4 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xi64 = sub <4 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xi64 = shufflevector <4 x i64> %add_4xi64, <4 x i64> %sub_4xi64, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xi64 = add <8 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xi64 = sub <8 x i64> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xi64 = shufflevector <8 x i64> %add_8xi64, <8 x i64> %sub_8xi64, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_2xf16 = fadd <2 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_2xf16 = fsub <2 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_2xf16 = shufflevector <2 x half> %add_2xf16, <2 x half> %sub_2xf16, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_4xf16 = fadd <4 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_4xf16 = fsub <4 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_4xf16 = shufflevector <4 x half> %add_4xf16, <4 x half> %sub_4xf16, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %add_8xf16 = fadd <8 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sub_8xf16 = fsub <8 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_8xf16 = shufflevector <8 x half> %add_8xf16, <8 x half> %sub_8xf16, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %add_16xf16 = fadd <16 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %sub_16xf16 = fsub <16 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_16xf16 = shufflevector <16 x half> %add_16xf16, <16 x half> %sub_16xf16, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %add_32xf16 = fadd <32 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %sub_32xf16 = fsub <32 x half> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addsub_32xf16 = shufflevector <32 x half> %add_32xf16, <32 x half> %sub_32xf16, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf32 = fadd <2 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf32 = fsub <2 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_2xf32 = shufflevector <2 x float> %add_2xf32, <2 x float> %sub_2xf32, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_4xf32 = fadd <4 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_4xf32 = fsub <4 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf32 = shufflevector <4 x float> %add_4xf32, <4 x float> %sub_4xf32, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_8xf32 = fadd <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_8xf32 = fsub <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf32 = shufflevector <8 x float> %add_8xf32, <8 x float> %sub_8xf32, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_16xf32 = fadd <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_16xf32 = fsub <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %addsub_16xf32 = shufflevector <16 x float> %add_16xf32, <16 x float> %sub_16xf32, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_2xf64 = fadd <2 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sub_2xf64 = fsub <2 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addsub_2xf64 = shufflevector <2 x double> %add_2xf64, <2 x double> %sub_2xf64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %add_4xf64 = fadd <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sub_4xf64 = fsub <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %addsub_4xf64 = shufflevector <4 x double> %add_4xf64, <4 x double> %sub_4xf64, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %add_8xf64 = fadd <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sub_8xf64 = fsub <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %addsub_8xf64 = shufflevector <8 x double> %add_8xf64, <8 x double> %sub_8xf64, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %add_2xi8 = add <2 x i8> undef, undef + %sub_2xi8 = sub <2 x i8> undef, undef + %addsub_2xi8 = shufflevector <2 x i8> %add_2xi8, <2 x i8> %sub_2xi8, <2 x i32> + + %add_4xi8 = add <4 x i8> undef, undef + %sub_4xi8 = sub <4 x i8> undef, undef + %addsub_4xi8 = shufflevector <4 x i8> %add_4xi8, <4 x i8> %sub_4xi8, <4 x i32> + + %add_8xi8 = add <8 x i8> undef, undef + %sub_8xi8 = sub <8 x i8> undef, undef + %addsub_8xi8 = shufflevector <8 x i8> %add_8xi8, <8 x i8> %sub_8xi8, <8 x i32> + + %add_16xi8 = add <16 x i8> undef, undef + %sub_16xi8 = sub <16 x i8> undef, undef + %addsub_16xi8 = shufflevector <16 x i8> %add_16xi8, <16 x i8> %sub_16xi8, <16 x i32> + + %add_32xi8 = add <32 x i8> undef, undef + %sub_32xi8 = sub <32 x i8> undef, undef + %addsub_32xi8 = shufflevector <32 x i8> %add_32xi8, <32 x i8> %sub_32xi8, <32 x i32> + + %add_64xi8 = add <64 x i8> undef, undef + %sub_64xi8 = sub <64 x i8> undef, undef + %addsub_64xi8 = shufflevector <64 x i8> %add_64xi8, <64 x i8> %sub_64xi8, <64 x i32> + + + %add_2xi16 = add <2 x i16> undef, undef + %sub_2xi16 = sub <2 x i16> undef, undef + %addsub_2xi16 = shufflevector <2 x i16> %add_2xi16, <2 x i16> %sub_2xi16, <2 x i32> + + %add_4xi16 = add <4 x i16> undef, undef + %sub_4xi16 = sub <4 x i16> undef, undef + %addsub_4xi16 = shufflevector <4 x i16> %add_4xi16, <4 x i16> %sub_4xi16, <4 x i32> + + %add_8xi16 = add <8 x i16> undef, undef + %sub_8xi16 = sub <8 x i16> undef, undef + %addsub_8xi16 = shufflevector <8 x i16> %add_8xi16, <8 x i16> %sub_8xi16, <8 x i32> + + %add_16xi16 = add <16 x i16> undef, undef + %sub_16xi16 = sub <16 x i16> undef, undef + %addsub_16xi16 = shufflevector <16 x i16> %add_16xi16, <16 x i16> %sub_16xi16, <16 x i32> + + %add_32xi16 = add <32 x i16> undef, undef + %sub_32xi16 = sub <32 x i16> undef, undef + %addsub_32xi16 = shufflevector <32 x i16> %add_32xi16, <32 x i16> %sub_32xi16, <32 x i32> + + + %add_2xi32 = add <2 x i32> undef, undef + %sub_2xi32 = sub <2 x i32> undef, undef + %addsub_2xi32 = shufflevector <2 x i32> %add_2xi32, <2 x i32> %sub_2xi32, <2 x i32> + + %add_4xi32 = add <4 x i32> undef, undef + %sub_4xi32 = sub <4 x i32> undef, undef + %addsub_4xi32 = shufflevector <4 x i32> %add_4xi32, <4 x i32> %sub_4xi32, <4 x i32> + + %add_8xi32 = add <8 x i32> undef, undef + %sub_8xi32 = sub <8 x i32> undef, undef + %addsub_8xi32 = shufflevector <8 x i32> %add_8xi32, <8 x i32> %sub_8xi32, <8 x i32> + + %add_16xi32 = add <16 x i32> undef, undef + %sub_16xi32 = sub <16 x i32> undef, undef + %addsub_16xi32 = shufflevector <16 x i32> %add_16xi32, <16 x i32> %sub_16xi32, <16 x i32> + + + %add_2xi64 = add <2 x i64> undef, undef + %sub_2xi64 = sub <2 x i64> undef, undef + %addsub_2xi64 = shufflevector <2 x i64> %add_2xi64, <2 x i64> %sub_2xi64, <2 x i32> + + %add_4xi64 = add <4 x i64> undef, undef + %sub_4xi64 = sub <4 x i64> undef, undef + %addsub_4xi64 = shufflevector <4 x i64> %add_4xi64, <4 x i64> %sub_4xi64, <4 x i32> + + %add_8xi64 = add <8 x i64> undef, undef + %sub_8xi64 = sub <8 x i64> undef, undef + %addsub_8xi64 = shufflevector <8 x i64> %add_8xi64, <8 x i64> %sub_8xi64, <8 x i32> + + + %add_2xf16 = fadd <2 x half> undef, undef + %sub_2xf16 = fsub <2 x half> undef, undef + %addsub_2xf16 = shufflevector <2 x half> %add_2xf16, <2 x half> %sub_2xf16, <2 x i32> + + %add_4xf16 = fadd <4 x half> undef, undef + %sub_4xf16 = fsub <4 x half> undef, undef + %addsub_4xf16 = shufflevector <4 x half> %add_4xf16, <4 x half> %sub_4xf16, <4 x i32> + + %add_8xf16 = fadd <8 x half> undef, undef + %sub_8xf16 = fsub <8 x half> undef, undef + %addsub_8xf16 = shufflevector <8 x half> %add_8xf16, <8 x half> %sub_8xf16, <8 x i32> + + %add_16xf16 = fadd <16 x half> undef, undef + %sub_16xf16 = fsub <16 x half> undef, undef + %addsub_16xf16 = shufflevector <16 x half> %add_16xf16, <16 x half> %sub_16xf16, <16 x i32> + + %add_32xf16 = fadd <32 x half> undef, undef + %sub_32xf16 = fsub <32 x half> undef, undef + %addsub_32xf16 = shufflevector <32 x half> %add_32xf16, <32 x half> %sub_32xf16, <32 x i32> + + + %add_2xf32 = fadd <2 x float> undef, undef + %sub_2xf32 = fsub <2 x float> undef, undef + %addsub_2xf32 = shufflevector <2 x float> %add_2xf32, <2 x float> %sub_2xf32, <2 x i32> + + %add_4xf32 = fadd <4 x float> undef, undef + %sub_4xf32 = fsub <4 x float> undef, undef + %addsub_4xf32 = shufflevector <4 x float> %add_4xf32, <4 x float> %sub_4xf32, <4 x i32> + + %add_8xf32 = fadd <8 x float> undef, undef + %sub_8xf32 = fsub <8 x float> undef, undef + %addsub_8xf32 = shufflevector <8 x float> %add_8xf32, <8 x float> %sub_8xf32, <8 x i32> + + %add_16xf32 = fadd <16 x float> undef, undef + %sub_16xf32 = fsub <16 x float> undef, undef + %addsub_16xf32 = shufflevector <16 x float> %add_16xf32, <16 x float> %sub_16xf32, <16 x i32> + + + %add_2xf64 = fadd <2 x double> undef, undef + %sub_2xf64 = fsub <2 x double> undef, undef + %addsub_2xf64 = shufflevector <2 x double> %add_2xf64, <2 x double> %sub_2xf64, <2 x i32> + + %add_4xf64 = fadd <4 x double> undef, undef + %sub_4xf64 = fsub <4 x double> undef, undef + %addsub_4xf64 = shufflevector <4 x double> %add_4xf64, <4 x double> %sub_4xf64, <4 x i32> + + %add_8xf64 = fadd <8 x double> undef, undef + %sub_8xf64 = fsub <8 x double> undef, undef + %addsub_8xf64 = shufflevector <8 x double> %add_8xf64, <8 x double> %sub_8xf64, <8 x i32> + + ret void +}