diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -118,7 +118,9 @@ unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const { bool Vector = (ClassID == 1); - if (Vector && !ST->hasSSE1()) + // Avoid auto-vectorizing when compiling for SSE1 only. + // That forces codegen to undo integer and v2f64 vectorization. + if (Vector && !ST->hasSSE2()) return 0; if (ST->is64Bit()) { @@ -136,7 +138,9 @@ return 512; if (ST->hasAVX() && PreferVectorWidth >= 256) return 256; - if (ST->hasSSE1() && PreferVectorWidth >= 128) + // Avoid auto-vectorizing when compiling for SSE1 only. + // That forces codegen to undo integer and v2f64 vectorization. + if (ST->hasSSE2() && PreferVectorWidth >= 128) return 128; return 0; } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll @@ -7,6 +7,9 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW +; Verify that we don't vectorize with SSE1 only. +; RUN: opt < %s -mtriple=i686-unknown -mattr=sse -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=X86-SSE1 + ; ; 128-bit Vectors ; @@ -19,6 +22,17 @@ ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[TMP3]], i32 1 ; CHECK-NEXT: ret <2 x double> [[R1]] +; +; X86-SSE1-LABEL: @buildvector_add_2f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[C0:%.*]] = fadd double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fadd double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: ret <2 x double> [[R1]] ; %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1 @@ -39,6 +53,17 @@ ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[TMP3]], i32 1 ; CHECK-NEXT: ret <2 x double> [[R1]] +; +; X86-SSE1-LABEL: @buildvector_sub_2f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[C0:%.*]] = fsub double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fsub double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: ret <2 x double> [[R1]] ; %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1 @@ -59,6 +84,17 @@ ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[TMP3]], i32 1 ; CHECK-NEXT: ret <2 x double> [[R1]] +; +; X86-SSE1-LABEL: @buildvector_mul_2f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[C0:%.*]] = fmul double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fmul double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: ret <2 x double> [[R1]] ; %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1 @@ -106,6 +142,17 @@ ; AVX512-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; AVX512-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[TMP3]], i32 1 ; AVX512-NEXT: ret <2 x double> [[R1]] +; +; X86-SSE1-LABEL: @buildvector_div_2f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[C0:%.*]] = fdiv double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fdiv double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: ret <2 x double> [[R1]] ; %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1 @@ -130,6 +177,25 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 ; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[TMP5]], i32 3 ; CHECK-NEXT: ret <4 x float> [[R3]] +; +; X86-SSE1-LABEL: @buildvector_add_4f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[C0:%.*]] = fadd float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fadd float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fadd float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fadd float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: ret <4 x float> [[R3]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 @@ -162,6 +228,25 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 ; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[TMP5]], i32 3 ; CHECK-NEXT: ret <4 x float> [[R3]] +; +; X86-SSE1-LABEL: @buildvector_sub_4f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[C0:%.*]] = fsub float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fsub float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fsub float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fsub float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: ret <4 x float> [[R3]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 @@ -194,6 +279,25 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 ; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[TMP5]], i32 3 ; CHECK-NEXT: ret <4 x float> [[R3]] +; +; X86-SSE1-LABEL: @buildvector_mul_4f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[C0:%.*]] = fmul float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fmul float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fmul float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fmul float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: ret <4 x float> [[R3]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 @@ -226,6 +330,25 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 ; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[TMP5]], i32 3 ; CHECK-NEXT: ret <4 x float> [[R3]] +; +; X86-SSE1-LABEL: @buildvector_div_4f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[C0:%.*]] = fdiv float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fdiv float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fdiv float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fdiv float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: ret <4 x float> [[R3]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 @@ -262,6 +385,25 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3 ; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3 ; CHECK-NEXT: ret <4 x double> [[R3]] +; +; X86-SSE1-LABEL: @buildvector_add_4f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3 +; X86-SSE1-NEXT: [[C0:%.*]] = fadd double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fadd double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fadd double [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fadd double [[A3]], [[B3]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3 +; X86-SSE1-NEXT: ret <4 x double> [[R3]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 @@ -294,6 +436,25 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3 ; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3 ; CHECK-NEXT: ret <4 x double> [[R3]] +; +; X86-SSE1-LABEL: @buildvector_sub_4f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3 +; X86-SSE1-NEXT: [[C0:%.*]] = fsub double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fsub double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fsub double [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fsub double [[A3]], [[B3]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3 +; X86-SSE1-NEXT: ret <4 x double> [[R3]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 @@ -326,6 +487,25 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3 ; CHECK-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3 ; CHECK-NEXT: ret <4 x double> [[R3]] +; +; X86-SSE1-LABEL: @buildvector_mul_4f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3 +; X86-SSE1-NEXT: [[C0:%.*]] = fmul double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fmul double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fmul double [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fmul double [[A3]], [[B3]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3 +; X86-SSE1-NEXT: ret <4 x double> [[R3]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 @@ -401,6 +581,25 @@ ; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3 ; AVX512-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3 ; AVX512-NEXT: ret <4 x double> [[R3]] +; +; X86-SSE1-LABEL: @buildvector_div_4f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3 +; X86-SSE1-NEXT: [[C0:%.*]] = fdiv double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fdiv double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fdiv double [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fdiv double [[A3]], [[B3]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3 +; X86-SSE1-NEXT: ret <4 x double> [[R3]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 @@ -441,6 +640,41 @@ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7 ; CHECK-NEXT: ret <8 x float> [[R7]] +; +; X86-SSE1-LABEL: @buildvector_add_8f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <8 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <8 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <8 x float> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <8 x float> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <8 x float> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <8 x float> [[B]], i32 7 +; X86-SSE1-NEXT: [[C0:%.*]] = fadd float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fadd float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fadd float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fadd float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fadd float [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fadd float [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fadd float [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fadd float [[A7]], [[B7]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[C7]], i32 7 +; X86-SSE1-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 @@ -497,6 +731,41 @@ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7 ; CHECK-NEXT: ret <8 x float> [[R7]] +; +; X86-SSE1-LABEL: @buildvector_sub_8f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <8 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <8 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <8 x float> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <8 x float> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <8 x float> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <8 x float> [[B]], i32 7 +; X86-SSE1-NEXT: [[C0:%.*]] = fsub float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fsub float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fsub float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fsub float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fsub float [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fsub float [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fsub float [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fsub float [[A7]], [[B7]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[C7]], i32 7 +; X86-SSE1-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 @@ -553,6 +822,41 @@ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7 ; CHECK-NEXT: ret <8 x float> [[R7]] +; +; X86-SSE1-LABEL: @buildvector_mul_8f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <8 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <8 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <8 x float> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <8 x float> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <8 x float> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <8 x float> [[B]], i32 7 +; X86-SSE1-NEXT: [[C0:%.*]] = fmul float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fmul float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fmul float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fmul float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fmul float [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fmul float [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fmul float [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fmul float [[A7]], [[B7]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[C7]], i32 7 +; X86-SSE1-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 @@ -609,6 +913,41 @@ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7 ; CHECK-NEXT: ret <8 x float> [[R7]] +; +; X86-SSE1-LABEL: @buildvector_div_8f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <8 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <8 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <8 x float> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <8 x float> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <8 x float> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <8 x float> [[B]], i32 7 +; X86-SSE1-NEXT: [[C0:%.*]] = fdiv float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fdiv float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fdiv float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fdiv float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fdiv float [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fdiv float [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fdiv float [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fdiv float [[A7]], [[B7]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[C7]], i32 7 +; X86-SSE1-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 @@ -669,6 +1008,41 @@ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7 ; CHECK-NEXT: ret <8 x double> [[R7]] +; +; X86-SSE1-LABEL: @buildvector_add_8f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <8 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <8 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <8 x double> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <8 x double> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <8 x double> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7 +; X86-SSE1-NEXT: [[C0:%.*]] = fadd double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fadd double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fadd double [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fadd double [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fadd double [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fadd double [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fadd double [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fadd double [[A7]], [[B7]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7 +; X86-SSE1-NEXT: ret <8 x double> [[R7]] ; %a0 = extractelement <8 x double> %a, i32 0 %a1 = extractelement <8 x double> %a, i32 1 @@ -725,6 +1099,41 @@ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7 ; CHECK-NEXT: ret <8 x double> [[R7]] +; +; X86-SSE1-LABEL: @buildvector_sub_8f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <8 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <8 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <8 x double> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <8 x double> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <8 x double> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7 +; X86-SSE1-NEXT: [[C0:%.*]] = fsub double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fsub double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fsub double [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fsub double [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fsub double [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fsub double [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fsub double [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fsub double [[A7]], [[B7]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7 +; X86-SSE1-NEXT: ret <8 x double> [[R7]] ; %a0 = extractelement <8 x double> %a, i32 0 %a1 = extractelement <8 x double> %a, i32 1 @@ -781,6 +1190,41 @@ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7 ; CHECK-NEXT: ret <8 x double> [[R7]] +; +; X86-SSE1-LABEL: @buildvector_mul_8f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <8 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <8 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <8 x double> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <8 x double> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <8 x double> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7 +; X86-SSE1-NEXT: [[C0:%.*]] = fmul double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fmul double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fmul double [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fmul double [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fmul double [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fmul double [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fmul double [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fmul double [[A7]], [[B7]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7 +; X86-SSE1-NEXT: ret <8 x double> [[R7]] ; %a0 = extractelement <8 x double> %a, i32 0 %a1 = extractelement <8 x double> %a, i32 1 @@ -912,6 +1356,41 @@ ; AVX512-NEXT: [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7 ; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7 ; AVX512-NEXT: ret <8 x double> [[R7]] +; +; X86-SSE1-LABEL: @buildvector_div_8f64( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <8 x double> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <8 x double> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <8 x double> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <8 x double> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <8 x double> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7 +; X86-SSE1-NEXT: [[C0:%.*]] = fdiv double [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fdiv double [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fdiv double [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fdiv double [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fdiv double [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fdiv double [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fdiv double [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fdiv double [[A7]], [[B7]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7 +; X86-SSE1-NEXT: ret <8 x double> [[R7]] ; %a0 = extractelement <8 x double> %a, i32 0 %a1 = extractelement <8 x double> %a, i32 1 @@ -984,6 +1463,73 @@ ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15 ; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15 ; CHECK-NEXT: ret <16 x float> [[R15]] +; +; X86-SSE1-LABEL: @buildvector_add_16f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <16 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <16 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <16 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <16 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <16 x float> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <16 x float> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <16 x float> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <16 x float> [[A]], i32 7 +; X86-SSE1-NEXT: [[A8:%.*]] = extractelement <16 x float> [[A]], i32 8 +; X86-SSE1-NEXT: [[A9:%.*]] = extractelement <16 x float> [[A]], i32 9 +; X86-SSE1-NEXT: [[A10:%.*]] = extractelement <16 x float> [[A]], i32 10 +; X86-SSE1-NEXT: [[A11:%.*]] = extractelement <16 x float> [[A]], i32 11 +; X86-SSE1-NEXT: [[A12:%.*]] = extractelement <16 x float> [[A]], i32 12 +; X86-SSE1-NEXT: [[A13:%.*]] = extractelement <16 x float> [[A]], i32 13 +; X86-SSE1-NEXT: [[A14:%.*]] = extractelement <16 x float> [[A]], i32 14 +; X86-SSE1-NEXT: [[A15:%.*]] = extractelement <16 x float> [[A]], i32 15 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <16 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <16 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <16 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <16 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <16 x float> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <16 x float> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <16 x float> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <16 x float> [[B]], i32 7 +; X86-SSE1-NEXT: [[B8:%.*]] = extractelement <16 x float> [[B]], i32 8 +; X86-SSE1-NEXT: [[B9:%.*]] = extractelement <16 x float> [[B]], i32 9 +; X86-SSE1-NEXT: [[B10:%.*]] = extractelement <16 x float> [[B]], i32 10 +; X86-SSE1-NEXT: [[B11:%.*]] = extractelement <16 x float> [[B]], i32 11 +; X86-SSE1-NEXT: [[B12:%.*]] = extractelement <16 x float> [[B]], i32 12 +; X86-SSE1-NEXT: [[B13:%.*]] = extractelement <16 x float> [[B]], i32 13 +; X86-SSE1-NEXT: [[B14:%.*]] = extractelement <16 x float> [[B]], i32 14 +; X86-SSE1-NEXT: [[B15:%.*]] = extractelement <16 x float> [[B]], i32 15 +; X86-SSE1-NEXT: [[C0:%.*]] = fadd float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fadd float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fadd float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fadd float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fadd float [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fadd float [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fadd float [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fadd float [[A7]], [[B7]] +; X86-SSE1-NEXT: [[C8:%.*]] = fadd float [[A8]], [[B8]] +; X86-SSE1-NEXT: [[C9:%.*]] = fadd float [[A9]], [[B9]] +; X86-SSE1-NEXT: [[C10:%.*]] = fadd float [[A10]], [[B10]] +; X86-SSE1-NEXT: [[C11:%.*]] = fadd float [[A11]], [[B11]] +; X86-SSE1-NEXT: [[C12:%.*]] = fadd float [[A12]], [[B12]] +; X86-SSE1-NEXT: [[C13:%.*]] = fadd float [[A13]], [[B13]] +; X86-SSE1-NEXT: [[C14:%.*]] = fadd float [[A14]], [[B14]] +; X86-SSE1-NEXT: [[C15:%.*]] = fadd float [[A15]], [[B15]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[C7]], i32 7 +; X86-SSE1-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[C8]], i32 8 +; X86-SSE1-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9 +; X86-SSE1-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[C10]], i32 10 +; X86-SSE1-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[C11]], i32 11 +; X86-SSE1-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[C12]], i32 12 +; X86-SSE1-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13 +; X86-SSE1-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[C14]], i32 14 +; X86-SSE1-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[C15]], i32 15 +; X86-SSE1-NEXT: ret <16 x float> [[R15]] ; %a0 = extractelement <16 x float> %a, i32 0 %a1 = extractelement <16 x float> %a, i32 1 @@ -1088,6 +1634,73 @@ ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15 ; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15 ; CHECK-NEXT: ret <16 x float> [[R15]] +; +; X86-SSE1-LABEL: @buildvector_sub_16f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <16 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <16 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <16 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <16 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <16 x float> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <16 x float> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <16 x float> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <16 x float> [[A]], i32 7 +; X86-SSE1-NEXT: [[A8:%.*]] = extractelement <16 x float> [[A]], i32 8 +; X86-SSE1-NEXT: [[A9:%.*]] = extractelement <16 x float> [[A]], i32 9 +; X86-SSE1-NEXT: [[A10:%.*]] = extractelement <16 x float> [[A]], i32 10 +; X86-SSE1-NEXT: [[A11:%.*]] = extractelement <16 x float> [[A]], i32 11 +; X86-SSE1-NEXT: [[A12:%.*]] = extractelement <16 x float> [[A]], i32 12 +; X86-SSE1-NEXT: [[A13:%.*]] = extractelement <16 x float> [[A]], i32 13 +; X86-SSE1-NEXT: [[A14:%.*]] = extractelement <16 x float> [[A]], i32 14 +; X86-SSE1-NEXT: [[A15:%.*]] = extractelement <16 x float> [[A]], i32 15 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <16 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <16 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <16 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <16 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <16 x float> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <16 x float> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <16 x float> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <16 x float> [[B]], i32 7 +; X86-SSE1-NEXT: [[B8:%.*]] = extractelement <16 x float> [[B]], i32 8 +; X86-SSE1-NEXT: [[B9:%.*]] = extractelement <16 x float> [[B]], i32 9 +; X86-SSE1-NEXT: [[B10:%.*]] = extractelement <16 x float> [[B]], i32 10 +; X86-SSE1-NEXT: [[B11:%.*]] = extractelement <16 x float> [[B]], i32 11 +; X86-SSE1-NEXT: [[B12:%.*]] = extractelement <16 x float> [[B]], i32 12 +; X86-SSE1-NEXT: [[B13:%.*]] = extractelement <16 x float> [[B]], i32 13 +; X86-SSE1-NEXT: [[B14:%.*]] = extractelement <16 x float> [[B]], i32 14 +; X86-SSE1-NEXT: [[B15:%.*]] = extractelement <16 x float> [[B]], i32 15 +; X86-SSE1-NEXT: [[C0:%.*]] = fsub float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fsub float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fsub float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fsub float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fsub float [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fsub float [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fsub float [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fsub float [[A7]], [[B7]] +; X86-SSE1-NEXT: [[C8:%.*]] = fsub float [[A8]], [[B8]] +; X86-SSE1-NEXT: [[C9:%.*]] = fsub float [[A9]], [[B9]] +; X86-SSE1-NEXT: [[C10:%.*]] = fsub float [[A10]], [[B10]] +; X86-SSE1-NEXT: [[C11:%.*]] = fsub float [[A11]], [[B11]] +; X86-SSE1-NEXT: [[C12:%.*]] = fsub float [[A12]], [[B12]] +; X86-SSE1-NEXT: [[C13:%.*]] = fsub float [[A13]], [[B13]] +; X86-SSE1-NEXT: [[C14:%.*]] = fsub float [[A14]], [[B14]] +; X86-SSE1-NEXT: [[C15:%.*]] = fsub float [[A15]], [[B15]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[C7]], i32 7 +; X86-SSE1-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[C8]], i32 8 +; X86-SSE1-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9 +; X86-SSE1-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[C10]], i32 10 +; X86-SSE1-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[C11]], i32 11 +; X86-SSE1-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[C12]], i32 12 +; X86-SSE1-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13 +; X86-SSE1-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[C14]], i32 14 +; X86-SSE1-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[C15]], i32 15 +; X86-SSE1-NEXT: ret <16 x float> [[R15]] ; %a0 = extractelement <16 x float> %a, i32 0 %a1 = extractelement <16 x float> %a, i32 1 @@ -1192,6 +1805,73 @@ ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15 ; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15 ; CHECK-NEXT: ret <16 x float> [[R15]] +; +; X86-SSE1-LABEL: @buildvector_mul_16f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <16 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <16 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <16 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <16 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <16 x float> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <16 x float> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <16 x float> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <16 x float> [[A]], i32 7 +; X86-SSE1-NEXT: [[A8:%.*]] = extractelement <16 x float> [[A]], i32 8 +; X86-SSE1-NEXT: [[A9:%.*]] = extractelement <16 x float> [[A]], i32 9 +; X86-SSE1-NEXT: [[A10:%.*]] = extractelement <16 x float> [[A]], i32 10 +; X86-SSE1-NEXT: [[A11:%.*]] = extractelement <16 x float> [[A]], i32 11 +; X86-SSE1-NEXT: [[A12:%.*]] = extractelement <16 x float> [[A]], i32 12 +; X86-SSE1-NEXT: [[A13:%.*]] = extractelement <16 x float> [[A]], i32 13 +; X86-SSE1-NEXT: [[A14:%.*]] = extractelement <16 x float> [[A]], i32 14 +; X86-SSE1-NEXT: [[A15:%.*]] = extractelement <16 x float> [[A]], i32 15 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <16 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <16 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <16 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <16 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <16 x float> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <16 x float> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <16 x float> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <16 x float> [[B]], i32 7 +; X86-SSE1-NEXT: [[B8:%.*]] = extractelement <16 x float> [[B]], i32 8 +; X86-SSE1-NEXT: [[B9:%.*]] = extractelement <16 x float> [[B]], i32 9 +; X86-SSE1-NEXT: [[B10:%.*]] = extractelement <16 x float> [[B]], i32 10 +; X86-SSE1-NEXT: [[B11:%.*]] = extractelement <16 x float> [[B]], i32 11 +; X86-SSE1-NEXT: [[B12:%.*]] = extractelement <16 x float> [[B]], i32 12 +; X86-SSE1-NEXT: [[B13:%.*]] = extractelement <16 x float> [[B]], i32 13 +; X86-SSE1-NEXT: [[B14:%.*]] = extractelement <16 x float> [[B]], i32 14 +; X86-SSE1-NEXT: [[B15:%.*]] = extractelement <16 x float> [[B]], i32 15 +; X86-SSE1-NEXT: [[C0:%.*]] = fmul float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fmul float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fmul float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fmul float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fmul float [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fmul float [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fmul float [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fmul float [[A7]], [[B7]] +; X86-SSE1-NEXT: [[C8:%.*]] = fmul float [[A8]], [[B8]] +; X86-SSE1-NEXT: [[C9:%.*]] = fmul float [[A9]], [[B9]] +; X86-SSE1-NEXT: [[C10:%.*]] = fmul float [[A10]], [[B10]] +; X86-SSE1-NEXT: [[C11:%.*]] = fmul float [[A11]], [[B11]] +; X86-SSE1-NEXT: [[C12:%.*]] = fmul float [[A12]], [[B12]] +; X86-SSE1-NEXT: [[C13:%.*]] = fmul float [[A13]], [[B13]] +; X86-SSE1-NEXT: [[C14:%.*]] = fmul float [[A14]], [[B14]] +; X86-SSE1-NEXT: [[C15:%.*]] = fmul float [[A15]], [[B15]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[C7]], i32 7 +; X86-SSE1-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[C8]], i32 8 +; X86-SSE1-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9 +; X86-SSE1-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[C10]], i32 10 +; X86-SSE1-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[C11]], i32 11 +; X86-SSE1-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[C12]], i32 12 +; X86-SSE1-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13 +; X86-SSE1-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[C14]], i32 14 +; X86-SSE1-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[C15]], i32 15 +; X86-SSE1-NEXT: ret <16 x float> [[R15]] ; %a0 = extractelement <16 x float> %a, i32 0 %a1 = extractelement <16 x float> %a, i32 1 @@ -1296,6 +1976,73 @@ ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15 ; CHECK-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15 ; CHECK-NEXT: ret <16 x float> [[R15]] +; +; X86-SSE1-LABEL: @buildvector_div_16f32( +; X86-SSE1-NEXT: [[A0:%.*]] = extractelement <16 x float> [[A:%.*]], i32 0 +; X86-SSE1-NEXT: [[A1:%.*]] = extractelement <16 x float> [[A]], i32 1 +; X86-SSE1-NEXT: [[A2:%.*]] = extractelement <16 x float> [[A]], i32 2 +; X86-SSE1-NEXT: [[A3:%.*]] = extractelement <16 x float> [[A]], i32 3 +; X86-SSE1-NEXT: [[A4:%.*]] = extractelement <16 x float> [[A]], i32 4 +; X86-SSE1-NEXT: [[A5:%.*]] = extractelement <16 x float> [[A]], i32 5 +; X86-SSE1-NEXT: [[A6:%.*]] = extractelement <16 x float> [[A]], i32 6 +; X86-SSE1-NEXT: [[A7:%.*]] = extractelement <16 x float> [[A]], i32 7 +; X86-SSE1-NEXT: [[A8:%.*]] = extractelement <16 x float> [[A]], i32 8 +; X86-SSE1-NEXT: [[A9:%.*]] = extractelement <16 x float> [[A]], i32 9 +; X86-SSE1-NEXT: [[A10:%.*]] = extractelement <16 x float> [[A]], i32 10 +; X86-SSE1-NEXT: [[A11:%.*]] = extractelement <16 x float> [[A]], i32 11 +; X86-SSE1-NEXT: [[A12:%.*]] = extractelement <16 x float> [[A]], i32 12 +; X86-SSE1-NEXT: [[A13:%.*]] = extractelement <16 x float> [[A]], i32 13 +; X86-SSE1-NEXT: [[A14:%.*]] = extractelement <16 x float> [[A]], i32 14 +; X86-SSE1-NEXT: [[A15:%.*]] = extractelement <16 x float> [[A]], i32 15 +; X86-SSE1-NEXT: [[B0:%.*]] = extractelement <16 x float> [[B:%.*]], i32 0 +; X86-SSE1-NEXT: [[B1:%.*]] = extractelement <16 x float> [[B]], i32 1 +; X86-SSE1-NEXT: [[B2:%.*]] = extractelement <16 x float> [[B]], i32 2 +; X86-SSE1-NEXT: [[B3:%.*]] = extractelement <16 x float> [[B]], i32 3 +; X86-SSE1-NEXT: [[B4:%.*]] = extractelement <16 x float> [[B]], i32 4 +; X86-SSE1-NEXT: [[B5:%.*]] = extractelement <16 x float> [[B]], i32 5 +; X86-SSE1-NEXT: [[B6:%.*]] = extractelement <16 x float> [[B]], i32 6 +; X86-SSE1-NEXT: [[B7:%.*]] = extractelement <16 x float> [[B]], i32 7 +; X86-SSE1-NEXT: [[B8:%.*]] = extractelement <16 x float> [[B]], i32 8 +; X86-SSE1-NEXT: [[B9:%.*]] = extractelement <16 x float> [[B]], i32 9 +; X86-SSE1-NEXT: [[B10:%.*]] = extractelement <16 x float> [[B]], i32 10 +; X86-SSE1-NEXT: [[B11:%.*]] = extractelement <16 x float> [[B]], i32 11 +; X86-SSE1-NEXT: [[B12:%.*]] = extractelement <16 x float> [[B]], i32 12 +; X86-SSE1-NEXT: [[B13:%.*]] = extractelement <16 x float> [[B]], i32 13 +; X86-SSE1-NEXT: [[B14:%.*]] = extractelement <16 x float> [[B]], i32 14 +; X86-SSE1-NEXT: [[B15:%.*]] = extractelement <16 x float> [[B]], i32 15 +; X86-SSE1-NEXT: [[C0:%.*]] = fdiv float [[A0]], [[B0]] +; X86-SSE1-NEXT: [[C1:%.*]] = fdiv float [[A1]], [[B1]] +; X86-SSE1-NEXT: [[C2:%.*]] = fdiv float [[A2]], [[B2]] +; X86-SSE1-NEXT: [[C3:%.*]] = fdiv float [[A3]], [[B3]] +; X86-SSE1-NEXT: [[C4:%.*]] = fdiv float [[A4]], [[B4]] +; X86-SSE1-NEXT: [[C5:%.*]] = fdiv float [[A5]], [[B5]] +; X86-SSE1-NEXT: [[C6:%.*]] = fdiv float [[A6]], [[B6]] +; X86-SSE1-NEXT: [[C7:%.*]] = fdiv float [[A7]], [[B7]] +; X86-SSE1-NEXT: [[C8:%.*]] = fdiv float [[A8]], [[B8]] +; X86-SSE1-NEXT: [[C9:%.*]] = fdiv float [[A9]], [[B9]] +; X86-SSE1-NEXT: [[C10:%.*]] = fdiv float [[A10]], [[B10]] +; X86-SSE1-NEXT: [[C11:%.*]] = fdiv float [[A11]], [[B11]] +; X86-SSE1-NEXT: [[C12:%.*]] = fdiv float [[A12]], [[B12]] +; X86-SSE1-NEXT: [[C13:%.*]] = fdiv float [[A13]], [[B13]] +; X86-SSE1-NEXT: [[C14:%.*]] = fdiv float [[A14]], [[B14]] +; X86-SSE1-NEXT: [[C15:%.*]] = fdiv float [[A15]], [[B15]] +; X86-SSE1-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[C0]], i32 0 +; X86-SSE1-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1 +; X86-SSE1-NEXT: [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[C2]], i32 2 +; X86-SSE1-NEXT: [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[C3]], i32 3 +; X86-SSE1-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[C4]], i32 4 +; X86-SSE1-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5 +; X86-SSE1-NEXT: [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[C6]], i32 6 +; X86-SSE1-NEXT: [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[C7]], i32 7 +; X86-SSE1-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[C8]], i32 8 +; X86-SSE1-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9 +; X86-SSE1-NEXT: [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[C10]], i32 10 +; X86-SSE1-NEXT: [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[C11]], i32 11 +; X86-SSE1-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[C12]], i32 12 +; X86-SSE1-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13 +; X86-SSE1-NEXT: [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[C14]], i32 14 +; X86-SSE1-NEXT: [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[C15]], i32 15 +; X86-SSE1-NEXT: ret <16 x float> [[R15]] ; %a0 = extractelement <16 x float> %a, i32 0 %a1 = extractelement <16 x float> %a, i32 1