diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1508,7 +1508,7 @@ ; X64-AVX-LABEL: test_x86_avx2_psrlv_q_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,4] -; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpbroadcastq {{.*#+}} xmm0 = [4,4] +; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x59,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte diff --git a/llvm/test/CodeGen/X86/combine-movmsk.ll b/llvm/test/CodeGen/X86/combine-movmsk.ll --- a/llvm/test/CodeGen/X86/combine-movmsk.ll +++ b/llvm/test/CodeGen/X86/combine-movmsk.ll @@ -238,10 +238,4 @@ ; SSE42-NEXT: xorl $3, %eax ; SSE42-NEXT: retq -; -; AVX-LABEL: movmskpd_pow2_mask: -; AVX: # %bb.0: -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: xorl $3, %eax -; AVX-NEXT: retq %1 = and <2 x i64> %a0, %2 = icmp eq <2 x i64> %1, zeroinitializer diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll --- a/llvm/test/CodeGen/X86/combine-mul.ll +++ b/llvm/test/CodeGen/X86/combine-mul.ll @@ -346,5 +346,6 @@ ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1] +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm2 ; AVX-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 diff --git a/llvm/test/CodeGen/X86/concat-cast.ll b/llvm/test/CodeGen/X86/concat-cast.ll --- a/llvm/test/CodeGen/X86/concat-cast.ll +++ b/llvm/test/CodeGen/X86/concat-cast.ll @@ -374,5 +374,5 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vsubpd %xmm2, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll b/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll --- a/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll +++ b/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll @@ -75,5 +75,7 @@ ; CHECK-LABEL: ExeDepsFix_broadcastsd: ; CHECK: ## %bb.0: -; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vmovddup {{.*#+}} xmm2 = [2147483647,2147483647] +; CHECK-NEXT: ## xmm2 = mem[0,0] +; CHECK-NEXT: vandpd %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll --- a/llvm/test/CodeGen/X86/extractelement-fp.ll +++ b/llvm/test/CodeGen/X86/extractelement-fp.ll @@ -318,21 +318,14 @@ ; This used to crash by creating a setcc with an i64 condition on a 32-bit target. define <3 x double> @extvselectsetcc_crash(<2 x double> %x) { -; X64-LABEL: extvselectsetcc_crash: -; X64: # %bb.0: -; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; X64-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; X64-NEXT: vandpd %xmm2, %xmm1, %xmm1 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3] -; X64-NEXT: retq -; -; X86-LABEL: extvselectsetcc_crash: -; X86: # %bb.0: -; X86-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 -; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; X86-NEXT: vandpd %xmm2, %xmm1, %xmm1 -; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X86-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3] -; X86-NEXT: retl +; CHECK-LABEL: extvselectsetcc_crash: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [5.0E+0,5.0E+0] +; CHECK-NEXT: # xmm1 = mem[0,0] +; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vandpd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3] +; CHECK-NEXT: ret{{[l|q]}} %cmp = fcmp oeq <2 x double> %x, %s = select <2 x i1> %cmp, <2 x double> , <2 x double> @@ -547,5 +540,7 @@ ; X64-LABEL: fabs_v4f64: ; X64: # %bb.0: -; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN] +; X64-NEXT: # xmm1 = mem[0,0] +; X64-NEXT: vandps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq @@ -557,5 +552,7 @@ ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN] +; X86-NEXT: # xmm1 = mem[0,0] +; X86-NEXT: vandps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovlps %xmm0, (%esp) ; X86-NEXT: fldl (%esp) @@ -819,6 +816,10 @@ ; X64-LABEL: copysign_v4f64: ; X64: # %bb.0: -; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vmovddup {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0] +; X64-NEXT: # xmm2 = mem[0,0] +; X64-NEXT: vandps %xmm2, %xmm1, %xmm1 +; X64-NEXT: vmovddup {{.*#+}} xmm2 = [NaN,NaN] +; X64-NEXT: # xmm2 = mem[0,0] +; X64-NEXT: vandps %xmm2, %xmm0, %xmm0 ; X64-NEXT: vorps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper @@ -831,6 +832,10 @@ ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-NEXT: vmovddup {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0] +; X86-NEXT: # xmm2 = mem[0,0] +; X86-NEXT: vandps %xmm2, %xmm1, %xmm1 +; X86-NEXT: vmovddup {{.*#+}} xmm2 = [NaN,NaN] +; X86-NEXT: # xmm2 = mem[0,0] +; X86-NEXT: vandps %xmm2, %xmm0, %xmm0 ; X86-NEXT: vorps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovlps %xmm0, (%esp) @@ -1097,5 +1102,7 @@ ; X64-LABEL: round_v4f64: ; X64: # %bb.0: -; X64-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; X64-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; X64-NEXT: # xmm1 = mem[0,0] +; X64-NEXT: vandpd %xmm1, %xmm0, %xmm1 ; X64-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1] ; X64-NEXT: # xmm2 = mem[0,0] @@ -1112,5 +1119,7 @@ ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 +; X86-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; X86-NEXT: # xmm1 = mem[0,0] +; X86-NEXT: vandpd %xmm1, %xmm0, %xmm1 ; X86-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1] ; X86-NEXT: # xmm2 = mem[0,0] diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll --- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll @@ -123,5 +123,5 @@ ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll --- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll @@ -125,5 +125,5 @@ ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 diff --git a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll --- a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll +++ b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll @@ -426,5 +426,5 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm1, %xmm2, %xmm4 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm5 @@ -724,5 +724,5 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm1, %xmm2, %xmm4 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm5 diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll --- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll @@ -2475,8 +2475,8 @@ ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483647,2147483647] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [2147483647,2147483647] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm3 ; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968] ; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0 @@ -2614,32 +2614,61 @@ ; SSE4-NEXT: retq ; -; AVX-LABEL: truncstore_v2i64_v2i16: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [32767,32767] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848] -; AVX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vmovmskpd %xmm1, %eax -; AVX-NEXT: xorl $3, %eax -; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne .LBB7_1 -; AVX-NEXT: # %bb.2: # %else -; AVX-NEXT: testb $2, %al -; AVX-NEXT: jne .LBB7_3 -; AVX-NEXT: .LBB7_4: # %else2 -; AVX-NEXT: retq -; AVX-NEXT: .LBB7_1: # %cond.store -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: testb $2, %al -; AVX-NEXT: je .LBB7_4 -; AVX-NEXT: .LBB7_3: # %cond.store1 -; AVX-NEXT: vpextrw $1, %xmm0, 2(%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: truncstore_v2i64_v2i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32767,32767] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848] +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovmskpd %xmm1, %eax +; AVX1-NEXT: xorl $3, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: jne .LBB7_1 +; AVX1-NEXT: # %bb.2: # %else +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: jne .LBB7_3 +; AVX1-NEXT: .LBB7_4: # %else2 +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB7_1: # %cond.store +; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: je .LBB7_4 +; AVX1-NEXT: .LBB7_3: # %cond.store1 +; AVX1-NEXT: vpextrw $1, %xmm0, 2(%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: truncstore_v2i64_v2i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [32767,32767] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848] +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vmovmskpd %xmm1, %eax +; AVX2-NEXT: xorl $3, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: jne .LBB7_1 +; AVX2-NEXT: # %bb.2: # %else +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: jne .LBB7_3 +; AVX2-NEXT: .LBB7_4: # %else2 +; AVX2-NEXT: retq +; AVX2-NEXT: .LBB7_1: # %cond.store +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: je .LBB7_4 +; AVX2-NEXT: .LBB7_3: # %cond.store1 +; AVX2-NEXT: vpextrw $1, %xmm0, 2(%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: truncstore_v2i64_v2i16: @@ -2784,31 +2813,59 @@ ; SSE4-NEXT: retq ; -; AVX-LABEL: truncstore_v2i64_v2i8: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [127,127] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488] -; AVX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vmovmskpd %xmm1, %eax -; AVX-NEXT: xorl $3, %eax -; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne .LBB8_1 -; AVX-NEXT: # %bb.2: # %else -; AVX-NEXT: testb $2, %al -; AVX-NEXT: jne .LBB8_3 -; AVX-NEXT: .LBB8_4: # %else2 -; AVX-NEXT: retq -; AVX-NEXT: .LBB8_1: # %cond.store -; AVX-NEXT: vpextrb $0, %xmm0, (%rdi) -; AVX-NEXT: testb $2, %al -; AVX-NEXT: je .LBB8_4 -; AVX-NEXT: .LBB8_3: # %cond.store1 -; AVX-NEXT: vpextrb $1, %xmm0, 1(%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: truncstore_v2i64_v2i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [127,127] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488] +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovmskpd %xmm1, %eax +; AVX1-NEXT: xorl $3, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: jne .LBB8_1 +; AVX1-NEXT: # %bb.2: # %else +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: jne .LBB8_3 +; AVX1-NEXT: .LBB8_4: # %else2 +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB8_1: # %cond.store +; AVX1-NEXT: vpextrb $0, %xmm0, (%rdi) +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: je .LBB8_4 +; AVX1-NEXT: .LBB8_3: # %cond.store1 +; AVX1-NEXT: vpextrb $1, %xmm0, 1(%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: truncstore_v2i64_v2i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [127,127] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488] +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vmovmskpd %xmm1, %eax +; AVX2-NEXT: xorl $3, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: jne .LBB8_1 +; AVX2-NEXT: # %bb.2: # %else +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: jne .LBB8_3 +; AVX2-NEXT: .LBB8_4: # %else2 +; AVX2-NEXT: retq +; AVX2-NEXT: .LBB8_1: # %cond.store +; AVX2-NEXT: vpextrb $0, %xmm0, (%rdi) +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: je .LBB8_4 +; AVX2-NEXT: .LBB8_3: # %cond.store1 +; AVX2-NEXT: vpextrb $1, %xmm0, 1(%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: truncstore_v2i64_v2i8: diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll --- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll @@ -2155,7 +2155,9 @@ ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; AVX2-NEXT: vmovapd {{.*#+}} xmm2 = [4294967295,4294967295] -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372041149743103,9223372041149743103] +; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [4294967295,4294967295] +; AVX2-NEXT: # xmm2 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm3, %xmm0, %xmm3 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [9223372041149743103,9223372041149743103] ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 ; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0 @@ -2274,31 +2276,61 @@ ; SSE4-NEXT: retq ; -; AVX-LABEL: truncstore_v2i64_v2i16: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [65535,65535] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 -; AVX-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343] -; AVX-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vmovmskpd %xmm1, %eax -; AVX-NEXT: xorl $3, %eax -; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne .LBB7_1 -; AVX-NEXT: # %bb.2: # %else -; AVX-NEXT: testb $2, %al -; AVX-NEXT: jne .LBB7_3 -; AVX-NEXT: .LBB7_4: # %else2 -; AVX-NEXT: retq -; AVX-NEXT: .LBB7_1: # %cond.store -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: testb $2, %al -; AVX-NEXT: je .LBB7_4 -; AVX-NEXT: .LBB7_3: # %cond.store1 -; AVX-NEXT: vpextrw $1, %xmm0, 2(%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: truncstore_v2i64_v2i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [65535,65535] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343] +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovmskpd %xmm1, %eax +; AVX1-NEXT: xorl $3, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: jne .LBB7_1 +; AVX1-NEXT: # %bb.2: # %else +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: jne .LBB7_3 +; AVX1-NEXT: .LBB7_4: # %else2 +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB7_1: # %cond.store +; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: je .LBB7_4 +; AVX1-NEXT: .LBB7_3: # %cond.store1 +; AVX1-NEXT: vpextrw $1, %xmm0, 2(%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: truncstore_v2i64_v2i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vmovddup {{.*#+}} xmm3 = [65535,65535] +; AVX2-NEXT: # xmm3 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm4, %xmm0, %xmm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343] +; AVX2-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vmovmskpd %xmm1, %eax +; AVX2-NEXT: xorl $3, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: jne .LBB7_1 +; AVX2-NEXT: # %bb.2: # %else +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: jne .LBB7_3 +; AVX2-NEXT: .LBB7_4: # %else2 +; AVX2-NEXT: retq +; AVX2-NEXT: .LBB7_1: # %cond.store +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: je .LBB7_4 +; AVX2-NEXT: .LBB7_3: # %cond.store1 +; AVX2-NEXT: vpextrw $1, %xmm0, 2(%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: truncstore_v2i64_v2i16: @@ -2424,30 +2456,59 @@ ; SSE4-NEXT: retq ; -; AVX-LABEL: truncstore_v2i64_v2i8: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [255,255] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 -; AVX-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063] -; AVX-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vmovmskpd %xmm1, %eax -; AVX-NEXT: xorl $3, %eax -; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne .LBB8_1 -; AVX-NEXT: # %bb.2: # %else -; AVX-NEXT: testb $2, %al -; AVX-NEXT: jne .LBB8_3 -; AVX-NEXT: .LBB8_4: # %else2 -; AVX-NEXT: retq -; AVX-NEXT: .LBB8_1: # %cond.store -; AVX-NEXT: vpextrb $0, %xmm0, (%rdi) -; AVX-NEXT: testb $2, %al -; AVX-NEXT: je .LBB8_4 -; AVX-NEXT: .LBB8_3: # %cond.store1 -; AVX-NEXT: vpextrb $1, %xmm0, 1(%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: truncstore_v2i64_v2i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [255,255] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063] +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovmskpd %xmm1, %eax +; AVX1-NEXT: xorl $3, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: jne .LBB8_1 +; AVX1-NEXT: # %bb.2: # %else +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: jne .LBB8_3 +; AVX1-NEXT: .LBB8_4: # %else2 +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB8_1: # %cond.store +; AVX1-NEXT: vpextrb $0, %xmm0, (%rdi) +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: je .LBB8_4 +; AVX1-NEXT: .LBB8_3: # %cond.store1 +; AVX1-NEXT: vpextrb $1, %xmm0, 1(%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: truncstore_v2i64_v2i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vmovddup {{.*#+}} xmm3 = [255,255] +; AVX2-NEXT: # xmm3 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm4, %xmm0, %xmm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063] +; AVX2-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vmovmskpd %xmm1, %eax +; AVX2-NEXT: xorl $3, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: jne .LBB8_1 +; AVX2-NEXT: # %bb.2: # %else +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: jne .LBB8_3 +; AVX2-NEXT: .LBB8_4: # %else2 +; AVX2-NEXT: retq +; AVX2-NEXT: .LBB8_1: # %cond.store +; AVX2-NEXT: vpextrb $0, %xmm0, (%rdi) +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: je .LBB8_4 +; AVX2-NEXT: .LBB8_3: # %cond.store1 +; AVX2-NEXT: vpextrb $1, %xmm0, 1(%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: truncstore_v2i64_v2i8: diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll --- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll @@ -976,5 +976,6 @@ ; AVX2-FALLBACK: # %bb.0: ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; AVX2-FALLBACK-NEXT: vpor %xmm3, %xmm2, %xmm3 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 @@ -993,23 +994,64 @@ ; AVX2-FALLBACK-NEXT: retq ; -; XOP-LABEL: vec128_i64_signed_reg_reg: -; XOP: # %bb.0: -; XOP-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 -; XOP-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 -; XOP-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 -; XOP-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 -; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 -; XOP-NEXT: vpsubq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $1, %xmm1, %xmm2 -; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm3, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 -; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1 -; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: retq +; XOP-FALLBACK-LABEL: vec128_i64_signed_reg_reg: +; XOP-FALLBACK: # %bb.0: +; XOP-FALLBACK-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOP-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOP-FALLBACK-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOP-FALLBACK-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOP-FALLBACK-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOP-FALLBACK-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOP-FALLBACK-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOP-FALLBACK-NEXT: retq +; +; XOPAVX1-LABEL: vec128_i64_signed_reg_reg: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOPAVX1-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX1-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: vec128_i64_signed_reg_reg: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; XOPAVX2-NEXT: vpor %xmm3, %xmm2, %xmm3 +; XOPAVX2-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX2-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX2-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512F-LABEL: vec128_i64_signed_reg_reg: @@ -1195,9 +1237,10 @@ ; AVX2-FALLBACK-LABEL: vec128_i64_unsigned_reg_reg: ; AVX2-FALLBACK: # %bb.0: -; AVX2-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-FALLBACK-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-FALLBACK-NEXT: vpxor %xmm2, %xmm0, %xmm2 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4 -; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm5 +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm5 = [1,1] +; AVX2-FALLBACK-NEXT: vpor %xmm5, %xmm4, %xmm5 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm2 @@ -1216,23 +1259,64 @@ ; AVX2-FALLBACK-NEXT: retq ; -; XOP-LABEL: vec128_i64_unsigned_reg_reg: -; XOP: # %bb.0: -; XOP-NEXT: vpcomgtuq %xmm1, %xmm0, %xmm2 -; XOP-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 -; XOP-NEXT: vpcomltuq %xmm1, %xmm0, %xmm4 -; XOP-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 -; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 -; XOP-NEXT: vpsubq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $1, %xmm1, %xmm2 -; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm3, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 -; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1 -; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: retq +; XOP-FALLBACK-LABEL: vec128_i64_unsigned_reg_reg: +; XOP-FALLBACK: # %bb.0: +; XOP-FALLBACK-NEXT: vpcomgtuq %xmm1, %xmm0, %xmm2 +; XOP-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOP-FALLBACK-NEXT: vpcomltuq %xmm1, %xmm0, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOP-FALLBACK-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOP-FALLBACK-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOP-FALLBACK-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOP-FALLBACK-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOP-FALLBACK-NEXT: retq +; +; XOPAVX1-LABEL: vec128_i64_unsigned_reg_reg: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpcomgtuq %xmm1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOPAVX1-NEXT: vpcomltuq %xmm1, %xmm0, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX1-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: vec128_i64_unsigned_reg_reg: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpcomgtuq %xmm1, %xmm0, %xmm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; XOPAVX2-NEXT: vpor %xmm3, %xmm2, %xmm3 +; XOPAVX2-NEXT: vpcomltuq %xmm1, %xmm0, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX2-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX2-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512F-LABEL: vec128_i64_unsigned_reg_reg: @@ -1422,5 +1506,6 @@ ; AVX2-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; AVX2-FALLBACK-NEXT: vpor %xmm3, %xmm2, %xmm3 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4 @@ -1439,24 +1524,67 @@ ; AVX2-FALLBACK-NEXT: retq ; -; XOP-LABEL: vec128_i64_signed_mem_reg: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa (%rdi), %xmm1 -; XOP-NEXT: vpcomgtq %xmm0, %xmm1, %xmm2 -; XOP-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 -; XOP-NEXT: vpcomltq %xmm0, %xmm1, %xmm4 -; XOP-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4 -; XOP-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 -; XOP-NEXT: vpsubq %xmm4, %xmm0, %xmm0 -; XOP-NEXT: vpsrlq $1, %xmm0, %xmm2 -; XOP-NEXT: vpsrlq $33, %xmm0, %xmm0 -; XOP-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 -; XOP-NEXT: vpsrlq $32, %xmm3, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 -; XOP-NEXT: vpaddq %xmm0, %xmm4, %xmm0 -; XOP-NEXT: vpsllq $32, %xmm0, %xmm0 -; XOP-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: retq +; XOP-FALLBACK-LABEL: vec128_i64_signed_mem_reg: +; XOP-FALLBACK: # %bb.0: +; XOP-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1 +; XOP-FALLBACK-NEXT: vpcomgtq %xmm0, %xmm1, %xmm2 +; XOP-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOP-FALLBACK-NEXT: vpcomltq %xmm0, %xmm1, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpsubq %xmm4, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpsrlq $1, %xmm0, %xmm2 +; XOP-FALLBACK-NEXT: vpsrlq $33, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOP-FALLBACK-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; XOP-FALLBACK-NEXT: vpsllq $32, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOP-FALLBACK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOP-FALLBACK-NEXT: retq +; +; XOPAVX1-LABEL: vec128_i64_signed_mem_reg: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vmovdqa (%rdi), %xmm1 +; XOPAVX1-NEXT: vpcomgtq %xmm0, %xmm1, %xmm2 +; XOPAVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOPAVX1-NEXT: vpcomltq %xmm0, %xmm1, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsrlq $1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpsrlq $33, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; XOPAVX1-NEXT: vpsllq $32, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: vec128_i64_signed_mem_reg: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vmovdqa (%rdi), %xmm1 +; XOPAVX2-NEXT: vpcomgtq %xmm0, %xmm1, %xmm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; XOPAVX2-NEXT: vpor %xmm3, %xmm2, %xmm3 +; XOPAVX2-NEXT: vpcomltq %xmm0, %xmm1, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpsubq %xmm4, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpsrlq $1, %xmm0, %xmm2 +; XOPAVX2-NEXT: vpsrlq $33, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX2-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; XOPAVX2-NEXT: vpsllq $32, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512F-LABEL: vec128_i64_signed_mem_reg: @@ -1645,5 +1773,6 @@ ; AVX2-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; AVX2-FALLBACK-NEXT: vpor %xmm3, %xmm2, %xmm3 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 @@ -1662,24 +1791,67 @@ ; AVX2-FALLBACK-NEXT: retq ; -; XOP-LABEL: vec128_i64_signed_reg_mem: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa (%rdi), %xmm1 -; XOP-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 -; XOP-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 -; XOP-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 -; XOP-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 -; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 -; XOP-NEXT: vpsubq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $1, %xmm1, %xmm2 -; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm3, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 -; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1 -; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: retq +; XOP-FALLBACK-LABEL: vec128_i64_signed_reg_mem: +; XOP-FALLBACK: # %bb.0: +; XOP-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1 +; XOP-FALLBACK-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOP-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOP-FALLBACK-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOP-FALLBACK-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOP-FALLBACK-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOP-FALLBACK-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOP-FALLBACK-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOP-FALLBACK-NEXT: retq +; +; XOPAVX1-LABEL: vec128_i64_signed_reg_mem: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vmovdqa (%rdi), %xmm1 +; XOPAVX1-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOPAVX1-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX1-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: vec128_i64_signed_reg_mem: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vmovdqa (%rdi), %xmm1 +; XOPAVX2-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; XOPAVX2-NEXT: vpor %xmm3, %xmm2, %xmm3 +; XOPAVX2-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX2-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX2-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512F-LABEL: vec128_i64_signed_reg_mem: @@ -1872,5 +2044,6 @@ ; AVX2-FALLBACK-NEXT: vmovdqa (%rsi), %xmm1 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; AVX2-FALLBACK-NEXT: vpor %xmm3, %xmm2, %xmm3 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 @@ -1889,25 +2062,70 @@ ; AVX2-FALLBACK-NEXT: retq ; -; XOP-LABEL: vec128_i64_signed_mem_mem: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa (%rdi), %xmm0 -; XOP-NEXT: vmovdqa (%rsi), %xmm1 -; XOP-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 -; XOP-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 -; XOP-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 -; XOP-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 -; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 -; XOP-NEXT: vpsubq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $1, %xmm1, %xmm2 -; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm3, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 -; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1 -; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: retq +; XOP-FALLBACK-LABEL: vec128_i64_signed_mem_mem: +; XOP-FALLBACK: # %bb.0: +; XOP-FALLBACK-NEXT: vmovdqa (%rdi), %xmm0 +; XOP-FALLBACK-NEXT: vmovdqa (%rsi), %xmm1 +; XOP-FALLBACK-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOP-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOP-FALLBACK-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOP-FALLBACK-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOP-FALLBACK-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOP-FALLBACK-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOP-FALLBACK-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOP-FALLBACK-NEXT: retq +; +; XOPAVX1-LABEL: vec128_i64_signed_mem_mem: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vmovdqa (%rdi), %xmm0 +; XOPAVX1-NEXT: vmovdqa (%rsi), %xmm1 +; XOPAVX1-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOPAVX1-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX1-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: vec128_i64_signed_mem_mem: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vmovdqa (%rdi), %xmm0 +; XOPAVX2-NEXT: vmovdqa (%rsi), %xmm1 +; XOPAVX2-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; XOPAVX2-NEXT: vpor %xmm3, %xmm2, %xmm3 +; XOPAVX2-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX2-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX2-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512F-LABEL: vec128_i64_signed_mem_mem: diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -121,13 +121,23 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: mul_v2i64c: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117] -; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 -; AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; AVX-NEXT: retq +; AVX2-LABEL: mul_v2i64c: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [117,117] +; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: mul_v2i64c: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117] +; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX512-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: retq entry: %A = mul <2 x i64> %i, < i64 117, i64 117 > diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -1208,6 +1208,9 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovapd {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vblendvpd %xmm2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX2-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775807,9223372036854775807] +; AVX2-NEXT: # xmm3 = mem[0,0] +; AVX2-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: # xmm4 = mem[0,0] +; AVX2-NEXT: vblendvpd %xmm2, %xmm3, %xmm4, %xmm3 ; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll --- a/llvm/test/CodeGen/X86/sat-add.ll +++ b/llvm/test/CodeGen/X86/sat-add.ll @@ -658,10 +658,13 @@ ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_min: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovapd {{.*#+}} xmm1 = [18446744073709551573,18446744073709551573] -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765] +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [18446744073709551573,18446744073709551573] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765] ; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -727,9 +730,10 @@ ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq @@ -794,9 +798,10 @@ ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq @@ -1207,6 +1212,8 @@ ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm3, %xmm0, %xmm3 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [9223372036854775807,9223372036854775807] +; AVX2-NEXT: vpxor %xmm4, %xmm1, %xmm4 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 ; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0 @@ -1276,5 +1283,5 @@ ; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 @@ -1345,6 +1352,8 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775807,9223372036854775807] +; AVX2-NEXT: vpxor %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll --- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll @@ -1305,6 +1305,9 @@ ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: vmovapd {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775807,9223372036854775807] +; AVX2-NEXT: # xmm2 = mem[0,0] +; AVX2-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: # xmm3 = mem[0,0] +; AVX2-NEXT: vblendvpd %xmm1, %xmm2, %xmm3, %xmm2 ; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 ; AVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll --- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll @@ -907,5 +907,5 @@ ; AVX2-LABEL: v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll --- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll @@ -241,13 +241,15 @@ ; CHECK-AVX2-LABEL: t3_wide: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411] -; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 -; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3 -; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; CHECK-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2863311530,2863311530] +; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 +; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 +; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [12297829382473034411,12297829382473034411] +; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; CHECK-AVX2-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; CHECK-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 -; CHECK-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; CHECK-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll --- a/llvm/test/CodeGen/X86/usub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll @@ -818,5 +818,5 @@ ; AVX2-LABEL: v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 diff --git a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll --- a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll +++ b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll @@ -343,5 +343,5 @@ ; AVX2-LABEL: ge_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 @@ -517,5 +517,5 @@ ; AVX2-LABEL: gt_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 @@ -765,5 +765,5 @@ ; AVX2-LABEL: le_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 @@ -940,5 +940,5 @@ ; AVX2-LABEL: lt_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -345,13 +345,25 @@ ; SSE-NEXT: retq ; -; VEX-LABEL: fptoui_2f64_to_4i32: -; VEX: # %bb.0: -; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1 -; VEX-NEXT: vpsrad $31, %xmm1, %xmm2 -; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: fptoui_2f64_to_4i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 +; AVX1-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: fptoui_2f64_to_4i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [-2.147483648E+9,-2.147483648E+9] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vaddpd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vcvttpd2dq %xmm1, %xmm1 +; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 +; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f64_to_4i32: @@ -397,13 +409,25 @@ ; SSE-NEXT: retq ; -; VEX-LABEL: fptoui_2f64_to_2i32: -; VEX: # %bb.0: -; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1 -; VEX-NEXT: vpsrad $31, %xmm1, %xmm2 -; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: fptoui_2f64_to_2i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 +; AVX1-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: fptoui_2f64_to_2i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [-2.147483648E+9,-2.147483648E+9] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vaddpd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vcvttpd2dq %xmm1, %xmm1 +; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 +; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f64_to_2i32: diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -53,12 +53,21 @@ ; SSE41-NEXT: retq ; -; VEX-LABEL: uitofp_2i32_to_2f32: -; VEX: # %bb.0: -; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] -; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 -; VEX-NEXT: vcvtpd2ps %xmm0, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: uitofp_2i32_to_2f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_2i32_to_2f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vcvtpd2ps %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: uitofp_2i32_to_2f32: @@ -605,8 +614,12 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4841369599423283200,4841369599423283200] +; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4985484787499139072,4985484787499139072] +; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX2-NEXT: # xmm2 = mem[0,0] +; AVX2-NEXT: vsubpd %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -668,11 +681,19 @@ ; SSE41-NEXT: retq ; -; VEX-LABEL: uitofp_2i32_to_2f64: -; VEX: # %bb.0: -; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] -; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: uitofp_2i32_to_2f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_2i32_to_2f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: uitofp_2i32_to_2f64: @@ -1909,21 +1930,40 @@ ; SSE41-NEXT: retq ; -; VEX-LABEL: uitofp_2i64_to_4f32: -; VEX: # %bb.0: -; VEX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2 -; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1 -; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 -; VEX-NEXT: vpextrq $1, %xmm1, %rax -; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 -; VEX-NEXT: vmovq %xmm1, %rax -; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 -; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero -; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2 -; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 -; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] -; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: uitofp_2i64_to_4f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2 +; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpextrq $1, %xmm1, %rax +; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 +; AVX1-NEXT: vmovq %xmm1, %rax +; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero +; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_2i64_to_4f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpsrlq $1, %xmm0, %xmm2 +; AVX2-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpextrq $1, %xmm1, %rax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 +; AVX2-NEXT: vmovq %xmm1, %rax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 +; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero +; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: uitofp_2i64_to_4f32: @@ -2024,22 +2064,42 @@ ; SSE41-NEXT: retq ; -; VEX-LABEL: uitofp_2i64_to_2f32: -; VEX: # %bb.0: -; VEX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2 -; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1 -; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 -; VEX-NEXT: vpextrq $1, %xmm1, %rax -; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 -; VEX-NEXT: vmovq %xmm1, %rax -; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 -; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero -; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2 -; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 -; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] -; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 -; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero -; VEX-NEXT: retq +; AVX1-LABEL: uitofp_2i64_to_2f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2 +; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpextrq $1, %xmm1, %rax +; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 +; AVX1-NEXT: vmovq %xmm1, %rax +; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero +; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_2i64_to_2f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpsrlq $1, %xmm0, %xmm2 +; AVX2-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpextrq $1, %xmm1, %rax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 +; AVX2-NEXT: vmovq %xmm1, %rax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 +; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero +; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX2-NEXT: retq ; ; AVX512F-LABEL: uitofp_2i64_to_2f32: @@ -3277,8 +3337,12 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4841369599423283200,4841369599423283200] +; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4985484787499139072,4985484787499139072] +; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX2-NEXT: # xmm2 = mem[0,0] +; AVX2-NEXT: vsubpd %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -3344,11 +3408,19 @@ ; SSE41-NEXT: retq ; -; VEX-LABEL: uitofp_load_2i32_to_2f64: -; VEX: # %bb.0: -; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] -; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: uitofp_load_2i32_to_2f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_load_2i32_to_2f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: uitofp_load_2i32_to_2f64: @@ -5659,13 +5731,15 @@ ; AVX2-LABEL: PR43609: ; AVX2: # %bb.0: -; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] ; AVX2-NEXT: vpor %xmm4, %xmm3, %xmm3 ; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] ; AVX2-NEXT: vpor %xmm5, %xmm0, %xmm0 -; AVX2-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX2-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX2-NEXT: # xmm6 = mem[0,0] ; AVX2-NEXT: vsubpd %xmm6, %xmm0, %xmm0 ; AVX2-NEXT: vaddpd %xmm0, %xmm3, %xmm0 @@ -5676,5 +5750,6 @@ ; AVX2-NEXT: vsubpd %xmm6, %xmm1, %xmm1 ; AVX2-NEXT: vaddpd %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX2-NEXT: # xmm2 = mem[0,0] ; AVX2-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vaddpd %xmm2, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vec_minmax_uint.ll b/llvm/test/CodeGen/X86/vec_minmax_uint.ll --- a/llvm/test/CodeGen/X86/vec_minmax_uint.ll +++ b/llvm/test/CodeGen/X86/vec_minmax_uint.ll @@ -72,5 +72,5 @@ ; AVX2-LABEL: max_gt_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 @@ -478,5 +478,5 @@ ; AVX2-LABEL: max_ge_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 @@ -883,5 +883,5 @@ ; AVX2-LABEL: min_lt_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 @@ -1291,5 +1291,5 @@ ; AVX2-LABEL: min_le_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll --- a/llvm/test/CodeGen/X86/vec_uaddo.ll +++ b/llvm/test/CodeGen/X86/vec_uaddo.ll @@ -856,14 +856,25 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: uaddo_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm0 -; AVX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovdqa %xmm1, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: uaddo_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vmovdqa %xmm1, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: uaddo_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm0 +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vmovdqa %xmm1, (%rdi) +; AVX2-NEXT: retq ; ; AVX512-LABEL: uaddo_v2i64: diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll --- a/llvm/test/CodeGen/X86/vec_usubo.ll +++ b/llvm/test/CodeGen/X86/vec_usubo.ll @@ -903,14 +903,25 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: usubo_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm0 -; AVX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovdqa %xmm1, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: usubo_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vmovdqa %xmm1, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: usubo_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm0 +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vmovdqa %xmm1, (%rdi) +; AVX2-NEXT: retq ; ; AVX512-LABEL: usubo_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -471,8 +471,19 @@ ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v16i8: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v16i8: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v16i8: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v16i8: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) ret <16 x i8> %b @@ -550,9 +561,22 @@ ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v8i16: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v8i16: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v8i16: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v8i16: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) ret <8 x i16> %b @@ -635,9 +659,22 @@ ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v4i32: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v4i32: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v4i32: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v4i32: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) ret <4 x i32> %b @@ -722,9 +759,22 @@ ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v2i64: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v2i64: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v2i64: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v2i64: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) ret <2 x i64> %b diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll --- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -1035,14 +1035,25 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: bool_reduction_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: cmpb $3, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; AVX1-LABEL: bool_reduction_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovmskpd %xmm0, %eax +; AVX1-NEXT: cmpb $3, %al +; AVX1-NEXT: sete %al +; AVX1-NEXT: retq +; +; AVX2-LABEL: bool_reduction_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovmskpd %xmm0, %eax +; AVX2-NEXT: cmpb $3, %al +; AVX2-NEXT: sete %al +; AVX2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -990,14 +990,25 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: bool_reduction_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: testl %eax, %eax -; AVX-NEXT: setne %al -; AVX-NEXT: retq +; AVX1-LABEL: bool_reduction_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovmskpd %xmm0, %eax +; AVX1-NEXT: testl %eax, %eax +; AVX1-NEXT: setne %al +; AVX1-NEXT: retq +; +; AVX2-LABEL: bool_reduction_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovmskpd %xmm0, %eax +; AVX2-NEXT: testl %eax, %eax +; AVX2-NEXT: setne %al +; AVX2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -85,5 +85,5 @@ ; AVX2-LABEL: var_funnnel_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlq $1, %xmm1, %xmm1 @@ -168,5 +168,5 @@ ; XOPAVX2-LABEL: var_funnnel_v2i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm1 @@ -953,14 +953,25 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_funnnel_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 -; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_funnnel_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_funnnel_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; AVX2-NEXT: vpsrlq $1, %xmm1, %xmm1 +; AVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v2i64: @@ -1024,14 +1035,25 @@ ; AVX512VLVBMI2-NEXT: retq ; -; XOP-LABEL: splatvar_funnnel_v2i64: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; XOP-NEXT: vpsrlq $1, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 -; XOP-NEXT: retq +; XOPAVX1-LABEL: splatvar_funnnel_v2i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: splatvar_funnnel_v2i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -762,5 +762,5 @@ ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm1 @@ -850,5 +850,5 @@ ; XOPAVX2-LABEL: splatvar_funnnel_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlq $1, %ymm1, %ymm1 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -88,5 +88,5 @@ ; AVX2-LABEL: var_funnnel_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsllvq %xmm3, %xmm0, %xmm3 @@ -732,15 +732,27 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_funnnel_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] -; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX-NEXT: vpsllq %xmm3, %xmm0, %xmm3 -; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1 -; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_funnnel_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_funnnel_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX2-NEXT: vpsllq %xmm3, %xmm0, %xmm3 +; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1 +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -588,5 +588,5 @@ ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsllq %xmm3, %ymm0, %ymm3 diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -85,5 +85,5 @@ ; AVX2-LABEL: var_funnnel_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 @@ -169,5 +169,5 @@ ; XOPAVX2-LABEL: var_funnnel_v2i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 @@ -1042,14 +1042,25 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_funnnel_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 -; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpsllq $1, %xmm0, %xmm0 -; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_funnnel_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 +; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_funnnel_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 +; AVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v2i64: @@ -1114,14 +1125,25 @@ ; AVX512VLVBMI2-NEXT: retq ; -; XOP-LABEL: splatvar_funnnel_v2i64: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4 -; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpsllq $1, %xmm0, %xmm0 -; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 -; XOP-NEXT: retq +; XOPAVX1-LABEL: splatvar_funnnel_v2i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: splatvar_funnnel_v2i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpsllq $1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -793,5 +793,5 @@ ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 @@ -882,5 +882,5 @@ ; XOPAVX2-LABEL: splatvar_funnnel_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -88,5 +88,5 @@ ; AVX2-LABEL: var_funnnel_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsrlvq %xmm3, %xmm0, %xmm3 @@ -754,15 +754,27 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_funnnel_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] -; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX-NEXT: vpsrlq %xmm3, %xmm0, %xmm3 -; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1 -; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_funnnel_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_funnnel_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX2-NEXT: vpsrlq %xmm3, %xmm0, %xmm3 +; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1 +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -614,5 +614,5 @@ ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3 diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -1491,12 +1491,13 @@ ; X64-AVX2-LABEL: mul_v2i64_neg_15_63: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551601,18446744073709551553] -; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3 -; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295] +; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 +; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551601,18446744073709551553] +; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; X64-AVX2-NEXT: retq ; @@ -1552,12 +1553,13 @@ ; X64-AVX2-LABEL: mul_v2i64_neg_17_65: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551599,18446744073709551551] -; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3 -; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295] +; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 +; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551599,18446744073709551551] +; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; X64-AVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll --- a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll @@ -17263,5 +17263,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -17448,5 +17449,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -17635,5 +17636,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -17820,5 +17822,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -18007,5 +18009,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -18192,5 +18195,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -18379,5 +18382,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -18564,5 +18568,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -18751,5 +18755,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -18936,5 +18941,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -19123,5 +19128,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -19308,5 +19314,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -19495,5 +19501,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -19680,5 +19687,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -19867,5 +19874,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -20052,5 +20060,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -20239,5 +20247,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -20424,5 +20433,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -20611,5 +20620,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -20796,5 +20806,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -20983,5 +20993,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -21168,5 +21179,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -21355,5 +21366,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -21540,5 +21552,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -21727,5 +21739,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -21912,5 +21925,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -22099,5 +22112,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -22284,5 +22298,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -22471,5 +22485,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -22656,5 +22671,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -22843,5 +22858,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -23028,5 +23044,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -23215,5 +23231,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -23400,5 +23417,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -23587,5 +23604,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -23772,5 +23790,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -23959,5 +23977,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -24144,5 +24163,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -24331,5 +24350,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -24516,5 +24536,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -24703,5 +24723,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -24888,5 +24909,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -25075,5 +25096,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -25260,5 +25282,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -25447,5 +25469,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -25632,5 +25655,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -25819,5 +25842,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -26004,5 +26028,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -26191,5 +26215,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -26376,5 +26401,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -26563,5 +26588,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -26748,5 +26774,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -26935,5 +26961,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -27120,5 +27147,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -27307,5 +27334,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -27492,5 +27520,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -27679,5 +27707,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -27864,5 +27893,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -28051,5 +28080,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -28236,5 +28266,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -28423,5 +28453,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -28608,5 +28639,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -28795,5 +28826,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -28980,5 +29012,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -29167,5 +29199,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -29352,5 +29385,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -29539,5 +29572,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -29724,5 +29758,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -29911,5 +29945,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -30096,5 +30131,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -30283,5 +30318,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -30468,5 +30504,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -30655,5 +30691,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -30840,5 +30877,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -31027,5 +31064,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -31212,5 +31250,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -31399,5 +31437,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -31584,5 +31623,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -31771,5 +31810,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -31956,5 +31996,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -32143,5 +32183,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -32328,5 +32369,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -32515,5 +32556,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -32700,5 +32742,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -32887,5 +32929,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -33072,5 +33115,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -33259,5 +33302,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -33444,5 +33488,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -33631,5 +33675,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -33816,5 +33861,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -34003,5 +34048,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -34188,5 +34234,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -34375,5 +34421,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -34560,5 +34607,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -34747,5 +34794,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -34932,5 +34980,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -35119,5 +35167,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -35304,5 +35353,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -35491,5 +35540,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -35676,5 +35726,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -35863,5 +35913,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -36048,5 +36099,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -36235,5 +36286,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -36420,5 +36472,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -36607,5 +36659,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -36792,5 +36845,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -36979,5 +37032,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -37164,5 +37218,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -37351,5 +37405,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -37536,5 +37591,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -37723,5 +37778,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -37908,5 +37964,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -38095,5 +38151,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -38280,5 +38337,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -38467,5 +38524,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -38652,5 +38710,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -38839,5 +38897,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -39024,5 +39083,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -39211,5 +39270,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -39396,5 +39456,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq @@ -39583,5 +39643,6 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -39768,5 +39829,5 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [63,63] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -845,5 +845,6 @@ ; AVX2-LABEL: trunc_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] +; AVX2-NEXT: vptest %xmm1, %xmm0 ; AVX2-NEXT: sete %al ; AVX2-NEXT: retq @@ -1058,5 +1059,6 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqu (%rdi), %xmm0 -; AVX2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64424509455,64424509455] +; AVX2-NEXT: vptest %xmm1, %xmm0 ; AVX2-NEXT: sete %al ; AVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll --- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll @@ -64,14 +64,25 @@ ; SSE42-NEXT: retq ; -; AVX-LABEL: test_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2 -; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: retq +; AVX1-LABEL: test_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: retq ; ; AVX512BW-LABEL: test_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll --- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll @@ -64,14 +64,25 @@ ; SSE42-NEXT: retq ; -; AVX-LABEL: test_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2 -; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: retq +; AVX1-LABEL: test_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: retq ; ; AVX512BW-LABEL: test_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -74,5 +74,5 @@ ; AVX2-LABEL: var_rotate_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [64,64] ; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2 ; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm1 @@ -721,12 +721,21 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_rotate_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64] -; AVX-NEXT: vpsubq %xmm1, %xmm2, %xmm2 -; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_rotate_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64] +; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_rotate_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [64,64] +; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_rotate_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -579,5 +579,5 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [64,64] ; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -67,5 +67,5 @@ ; AVX2-LABEL: var_shift_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2 ; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 @@ -633,12 +633,21 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_shift_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 -; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsubq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_shift_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_shift_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_shift_v2i64: @@ -934,13 +943,24 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_modulo_shift_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 -; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsubq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_modulo_shift_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_modulo_shift_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64: @@ -955,5 +975,6 @@ ; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 @@ -1602,8 +1623,14 @@ ; AVX2-NEXT: retq ; -; XOP-LABEL: splatconstant_shift_v2i64: -; XOP: # %bb.0: -; XOP-NEXT: vpshaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; XOP-NEXT: retq +; XOPAVX1-LABEL: splatconstant_shift_v2i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpshaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: splatconstant_shift_v2i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709551609,18446744073709551609] +; XOPAVX2-NEXT: vpshaq %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatconstant_shift_v2i64: @@ -1765,13 +1792,23 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: PR52719: -; AVX: # %bb.0: -; AVX-NEXT: vmovd %edi, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 -; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsubq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: PR52719: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovd %edi, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR52719: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovd %edi, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: PR52719: diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -1023,5 +1023,6 @@ ; AVX2-LABEL: splatvar_modulo_shift_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 @@ -1045,5 +1046,6 @@ ; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -776,15 +776,29 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_modulo_shift_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_modulo_shift_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq ; -; XOP-LABEL: splatvar_modulo_shift_v2i64: -; XOP: # %bb.0: -; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; XOP-NEXT: retq +; AVX2-LABEL: splatvar_modulo_shift_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_modulo_shift_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -833,5 +833,6 @@ ; AVX2-LABEL: splatvar_modulo_shift_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -848,5 +849,6 @@ ; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -683,15 +683,29 @@ ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_modulo_shift_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_modulo_shift_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq ; -; XOP-LABEL: splatvar_modulo_shift_v2i64: -; XOP: # %bb.0: -; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0 -; XOP-NEXT: retq +; AVX2-LABEL: splatvar_modulo_shift_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_modulo_shift_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -758,5 +758,6 @@ ; AVX2-LABEL: splatvar_modulo_shift_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -773,5 +774,6 @@ ; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll --- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll @@ -108,14 +108,25 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_packus_v2i64_v2i32: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_packus_v2i64_v2i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v2i64_v2i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_packus_v2i64_v2i32: @@ -258,15 +269,27 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_packus_v2i64_v2i32_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovq %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_packus_v2i64_v2i32_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vmovq %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v2i64_v2i32_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vmovq %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_packus_v2i64_v2i32_store: @@ -1116,5 +1139,5 @@ ; AVX2-SLOW-LABEL: trunc_packus_v2i64_v2i16: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1128,5 +1151,5 @@ ; AVX2-FAST-LABEL: trunc_packus_v2i64_v2i16: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] ; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1294,5 +1317,5 @@ ; AVX2-SLOW-LABEL: trunc_packus_v2i64_v2i16_store: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1307,5 +1330,5 @@ ; AVX2-FAST-LABEL: trunc_packus_v2i64_v2i16_store: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] ; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -2802,14 +2825,25 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_packus_v2i64_v2i8: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_packus_v2i64_v2i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v2i64_v2i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [255,255] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_packus_v2i64_v2i8: @@ -2956,15 +2990,27 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_packus_v2i64_v2i8_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_packus_v2i64_v2i8_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v2i64_v2i8_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [255,255] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_packus_v2i64_v2i8_store: diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll --- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll @@ -114,14 +114,25 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_ssat_v2i64_v2i32: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_ssat_v2i64_v2i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v2i64_v2i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_ssat_v2i64_v2i32: @@ -260,15 +271,27 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_ssat_v2i64_v2i32_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovlpd %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_ssat_v2i64_v2i32_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vmovlpd %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v2i64_v2i32_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vmovlpd %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_ssat_v2i64_v2i32_store: @@ -1140,8 +1163,8 @@ ; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32767,32767] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1152,8 +1175,8 @@ ; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32767,32767] ; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] ; AVX2-FAST-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1312,8 +1335,8 @@ ; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16_store: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32767,32767] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1325,8 +1348,8 @@ ; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16_store: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32767,32767] ; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] ; AVX2-FAST-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -2567,14 +2590,25 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_ssat_v2i64_v2i8: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_ssat_v2i64_v2i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v2i64_v2i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [127,127] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_ssat_v2i64_v2i8: @@ -2716,15 +2750,27 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_ssat_v2i64_v2i8_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_ssat_v2i64_v2i8_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v2i64_v2i8_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [127,127] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_ssat_v2i64_v2i8_store: diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll --- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll @@ -76,13 +76,25 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_usat_v2i64_v2i32: -; AVX: # %bb.0: -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] -; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_usat_v2i64_v2i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v2i64_v2i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] +; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_usat_v2i64_v2i32: @@ -181,14 +193,27 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_usat_v2i64_v2i32_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] -; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovlpd %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_usat_v2i64_v2i32_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vmovlpd %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v2i64_v2i32_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] +; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vmovlpd %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_usat_v2i64_v2i32_store: @@ -794,7 +819,9 @@ ; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535] -; AVX2-SLOW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] +; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535] +; AVX2-SLOW-NEXT: # xmm1 = mem[0,0] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -805,7 +832,9 @@ ; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535] -; AVX2-FAST-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] +; AVX2-FAST-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535] +; AVX2-FAST-NEXT: # xmm1 = mem[0,0] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-FAST-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] ; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -923,7 +952,9 @@ ; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16_store: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535] -; AVX2-SLOW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] +; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535] +; AVX2-SLOW-NEXT: # xmm1 = mem[0,0] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -935,7 +966,9 @@ ; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16_store: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535] -; AVX2-FAST-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] +; AVX2-FAST-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535] +; AVX2-FAST-NEXT: # xmm1 = mem[0,0] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-FAST-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] ; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -2099,13 +2132,25 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_usat_v2i64_v2i8: -; AVX: # %bb.0: -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] -; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_usat_v2i64_v2i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [255,255] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v2i64_v2i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [255,255] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] +; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_usat_v2i64_v2i8: @@ -2207,14 +2252,27 @@ ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_usat_v2i64_v2i8_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] -; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_usat_v2i64_v2i8_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [255,255] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v2i64_v2i8_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [255,255] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] +; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_usat_v2i64_v2i8_store: diff --git a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll --- a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll @@ -2681,9 +2681,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, @@ -2710,9 +2717,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $15, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $15, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $15, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, @@ -2738,9 +2752,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $16, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $16, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $16, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, @@ -2766,9 +2787,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $17, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $17, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $17, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, @@ -2794,9 +2822,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $18, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $18, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $18, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, @@ -2823,9 +2858,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, @@ -2858,9 +2900,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, @@ -2937,9 +2986,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = ashr <2 x i64> %t0, @@ -2966,9 +3022,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $15, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $15, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $15, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = ashr <2 x i64> %t0, @@ -2994,9 +3057,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $16, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $16, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $16, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = ashr <2 x i64> %t0, @@ -3022,9 +3092,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $17, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $17, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $17, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = ashr <2 x i64> %t0, @@ -3050,9 +3127,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $18, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $18, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $18, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = ashr <2 x i64> %t0, @@ -3100,5 +3184,6 @@ ; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpsrad $1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -3156,5 +3241,6 @@ ; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_32: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] @@ -3299,9 +3385,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_shl_1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_shl_1: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, @@ -3334,9 +3427,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_32: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_shl_32: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_shl_32: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, @@ -3411,9 +3511,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsllq $15, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsllq $15, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllq $15, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, @@ -3439,9 +3546,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsllq $16, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsllq $16, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllq $16, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, @@ -3467,9 +3581,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsllq $17, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsllq $17, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllq $17, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, @@ -3495,9 +3616,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsllq $18, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsllq $18, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllq $18, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, @@ -3524,9 +3652,16 @@ ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, diff --git a/llvm/test/CodeGen/X86/vselect-minmax.ll b/llvm/test/CodeGen/X86/vselect-minmax.ll --- a/llvm/test/CodeGen/X86/vselect-minmax.ll +++ b/llvm/test/CodeGen/X86/vselect-minmax.ll @@ -9550,5 +9550,5 @@ ; AVX2-LABEL: test181: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 @@ -9618,5 +9618,5 @@ ; AVX2-LABEL: test182: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 @@ -9686,5 +9686,5 @@ ; AVX2-LABEL: test183: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 @@ -9754,5 +9754,5 @@ ; AVX2-LABEL: test184: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 @@ -10056,5 +10056,5 @@ ; AVX2-LABEL: test189: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 @@ -10124,5 +10124,5 @@ ; AVX2-LABEL: test190: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 @@ -10192,5 +10192,5 @@ ; AVX2-LABEL: test191: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 @@ -10260,5 +10260,5 @@ ; AVX2-LABEL: test192: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2