diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9481,6 +9481,7 @@ unsigned ScalarSize = Ld.getValueSizeInBits(); bool IsGE256 = (VT.getSizeInBits() >= 256); + bool IsLE256 = (VT.getSizeInBits() <= 256); // When optimizing for size, generate up to 5 extra bytes for a broadcast // instruction to save 8 or more bytes of constant pool data. @@ -9504,7 +9505,9 @@ // with AVX2, also splat i8 and i16. // With pattern matching, the VBROADCAST node may become a VMOVDDUP. if (ScalarSize == 32 || - (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) || + (ScalarSize == 64 && + (IsGE256 || Subtarget.hasVLX() || + (IsLE256 && Subtarget.hasAVX2() && !Subtarget.hasAVX512()))) || (ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) || (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { const Constant *C = nullptr; diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1507,9 +1507,9 @@ ; ; X64-AVX-LABEL: test_x86_avx2_psrlv_q_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,4] -; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpbroadcastq {{.*#+}} xmm0 = [4,4] +; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x59,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/combine-movmsk.ll b/llvm/test/CodeGen/X86/combine-movmsk.ll --- a/llvm/test/CodeGen/X86/combine-movmsk.ll +++ b/llvm/test/CodeGen/X86/combine-movmsk.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) @@ -238,11 +238,20 @@ ; SSE42-NEXT: xorl $3, %eax ; SSE42-NEXT: retq ; -; AVX-LABEL: movmskpd_pow2_mask: -; AVX: # %bb.0: -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: xorl $3, %eax -; AVX-NEXT: retq +; AVX1-LABEL: movmskpd_pow2_mask: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovmskpd %xmm0, %eax +; AVX1-NEXT: xorl $3, %eax +; AVX1-NEXT: retq +; +; AVX2-LABEL: movmskpd_pow2_mask: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovmskpd %xmm0, %eax +; AVX2-NEXT: retq %1 = and <2 x i64> %a0, %2 = icmp eq <2 x i64> %1, zeroinitializer %3 = sext <2 x i1> %2 to <2 x i64> @@ -258,6 +267,22 @@ ; SSE-NEXT: movmskps %xmm0, %eax ; SSE-NEXT: xorl $15, %eax ; SSE-NEXT: retq +; +; AVX1-LABEL: movmskps_pow2_mask: +; AVX1: # %bb.0: +; AVX1-NEXT: vpslld $29, %xmm0, %xmm0 +; AVX1-NEXT: vmovmskps %xmm0, %eax +; AVX1-NEXT: xorl $15, %eax +; AVX1-NEXT: retq +; +; AVX2-LABEL: movmskps_pow2_mask: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovmskps %xmm0, %eax +; AVX2-NEXT: retq %1 = and <4 x i32> %a0, %2 = icmp eq <4 x i32> %1, zeroinitializer %3 = sext <4 x i1> %2 to <4 x i32> diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll --- a/llvm/test/CodeGen/X86/combine-mul.ll +++ b/llvm/test/CodeGen/X86/combine-mul.ll @@ -345,7 +345,8 @@ ; AVX: # %bb.0: ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1] +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm2 ; AVX-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 ; AVX-NEXT: vpsrlq $32, %xmm1, %xmm3 diff --git a/llvm/test/CodeGen/X86/concat-cast.ll b/llvm/test/CodeGen/X86/concat-cast.ll --- a/llvm/test/CodeGen/X86/concat-cast.ll +++ b/llvm/test/CodeGen/X86/concat-cast.ll @@ -373,7 +373,7 @@ ; AVX2-LABEL: mismatch_tofp_v4i32_v4f32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vsubpd %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vcvtpd2ps %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll b/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll --- a/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll +++ b/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll @@ -74,7 +74,9 @@ define <2 x double> @ExeDepsFix_broadcastsd(<2 x double> %arg, <2 x double> %arg2) { ; CHECK-LABEL: ExeDepsFix_broadcastsd: ; CHECK: ## %bb.0: -; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vmovddup {{.*#+}} xmm2 = [2147483647,2147483647] +; CHECK-NEXT: ## xmm2 = mem[0,0] +; CHECK-NEXT: vandpd %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %bitcast = bitcast <2 x double> %arg to <2 x i64> diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll --- a/llvm/test/CodeGen/X86/extractelement-fp.ll +++ b/llvm/test/CodeGen/X86/extractelement-fp.ll @@ -317,23 +317,16 @@ ; This used to crash by creating a setcc with an i64 condition on a 32-bit target. define <3 x double> @extvselectsetcc_crash(<2 x double> %x) { -; X64-LABEL: extvselectsetcc_crash: -; X64: # %bb.0: -; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; X64-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; X64-NEXT: vandpd %xmm2, %xmm1, %xmm1 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3] -; X64-NEXT: retq -; -; X86-LABEL: extvselectsetcc_crash: -; X86: # %bb.0: -; X86-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 -; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; X86-NEXT: vandpd %xmm2, %xmm1, %xmm1 -; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X86-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3] -; X86-NEXT: retl +; CHECK-LABEL: extvselectsetcc_crash: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [5.0E+0,5.0E+0] +; CHECK-NEXT: # xmm1 = mem[0,0] +; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vandpd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3] +; CHECK-NEXT: ret{{[l|q]}} %cmp = fcmp oeq <2 x double> %x, %s = select <2 x i1> %cmp, <2 x double> , <2 x double> %r = shufflevector <2 x double> %s, <2 x double> %x, <3 x i32> @@ -546,7 +539,9 @@ define double @fabs_v4f64(<4 x double> %x) nounwind { ; X64-LABEL: fabs_v4f64: ; X64: # %bb.0: -; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN] +; X64-NEXT: # xmm1 = mem[0,0] +; X64-NEXT: vandps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq ; @@ -556,7 +551,9 @@ ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN] +; X86-NEXT: # xmm1 = mem[0,0] +; X86-NEXT: vandps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovlps %xmm0, (%esp) ; X86-NEXT: fldl (%esp) ; X86-NEXT: movl %ebp, %esp @@ -818,8 +815,12 @@ define double @copysign_v4f64(<4 x double> %x, <4 x double> %y) nounwind { ; X64-LABEL: copysign_v4f64: ; X64: # %bb.0: -; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vmovddup {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0] +; X64-NEXT: # xmm2 = mem[0,0] +; X64-NEXT: vandps %xmm2, %xmm1, %xmm1 +; X64-NEXT: vmovddup {{.*#+}} xmm2 = [NaN,NaN] +; X64-NEXT: # xmm2 = mem[0,0] +; X64-NEXT: vandps %xmm2, %xmm0, %xmm0 ; X64-NEXT: vorps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq @@ -830,8 +831,12 @@ ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-NEXT: vmovddup {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0] +; X86-NEXT: # xmm2 = mem[0,0] +; X86-NEXT: vandps %xmm2, %xmm1, %xmm1 +; X86-NEXT: vmovddup {{.*#+}} xmm2 = [NaN,NaN] +; X86-NEXT: # xmm2 = mem[0,0] +; X86-NEXT: vandps %xmm2, %xmm0, %xmm0 ; X86-NEXT: vorps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovlps %xmm0, (%esp) ; X86-NEXT: fldl (%esp) @@ -1096,7 +1101,9 @@ define double @round_v4f64(<4 x double> %x) nounwind { ; X64-LABEL: round_v4f64: ; X64: # %bb.0: -; X64-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; X64-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; X64-NEXT: # xmm1 = mem[0,0] +; X64-NEXT: vandpd %xmm1, %xmm0, %xmm1 ; X64-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1] ; X64-NEXT: # xmm2 = mem[0,0] ; X64-NEXT: vorpd %xmm2, %xmm1, %xmm1 @@ -1111,7 +1118,9 @@ ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 +; X86-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; X86-NEXT: # xmm1 = mem[0,0] +; X86-NEXT: vandpd %xmm1, %xmm0, %xmm1 ; X86-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1] ; X86-NEXT: # xmm2 = mem[0,0] ; X86-NEXT: vorpd %xmm2, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll --- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll @@ -122,7 +122,7 @@ ; X64-AVX2-LABEL: test_reduce_v2i64: ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 ; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll --- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll @@ -124,7 +124,7 @@ ; X64-AVX2-LABEL: test_reduce_v2i64: ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 ; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 diff --git a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll --- a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll +++ b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll @@ -425,7 +425,7 @@ ; AVX2-LABEL: reassociate_umax_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm1, %xmm2, %xmm4 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm5 ; AVX2-NEXT: vpcmpgtq %xmm5, %xmm4, %xmm4 @@ -723,7 +723,7 @@ ; AVX2-LABEL: reassociate_umin_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm1, %xmm2, %xmm4 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm5 ; AVX2-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll --- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll @@ -2474,10 +2474,10 @@ ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483647,2147483647] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [2147483647,2147483647] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm3 ; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968] ; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -2613,34 +2613,63 @@ ; SSE4-NEXT: pextrw $1, %xmm0, 2(%rdi) ; SSE4-NEXT: retq ; -; AVX-LABEL: truncstore_v2i64_v2i16: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [32767,32767] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848] -; AVX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vmovmskpd %xmm1, %eax -; AVX-NEXT: xorl $3, %eax -; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne .LBB7_1 -; AVX-NEXT: # %bb.2: # %else -; AVX-NEXT: testb $2, %al -; AVX-NEXT: jne .LBB7_3 -; AVX-NEXT: .LBB7_4: # %else2 -; AVX-NEXT: retq -; AVX-NEXT: .LBB7_1: # %cond.store -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: testb $2, %al -; AVX-NEXT: je .LBB7_4 -; AVX-NEXT: .LBB7_3: # %cond.store1 -; AVX-NEXT: vpextrw $1, %xmm0, 2(%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: truncstore_v2i64_v2i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32767,32767] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848] +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovmskpd %xmm1, %eax +; AVX1-NEXT: xorl $3, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: jne .LBB7_1 +; AVX1-NEXT: # %bb.2: # %else +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: jne .LBB7_3 +; AVX1-NEXT: .LBB7_4: # %else2 +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB7_1: # %cond.store +; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: je .LBB7_4 +; AVX1-NEXT: .LBB7_3: # %cond.store1 +; AVX1-NEXT: vpextrw $1, %xmm0, 2(%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: truncstore_v2i64_v2i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [32767,32767] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848] +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vmovmskpd %xmm1, %eax +; AVX2-NEXT: xorl $3, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: jne .LBB7_1 +; AVX2-NEXT: # %bb.2: # %else +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: jne .LBB7_3 +; AVX2-NEXT: .LBB7_4: # %else2 +; AVX2-NEXT: retq +; AVX2-NEXT: .LBB7_1: # %cond.store +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: je .LBB7_4 +; AVX2-NEXT: .LBB7_3: # %cond.store1 +; AVX2-NEXT: vpextrw $1, %xmm0, 2(%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: truncstore_v2i64_v2i16: ; AVX512F: # %bb.0: @@ -2783,33 +2812,61 @@ ; SSE4-NEXT: pextrb $1, %xmm2, 1(%rdi) ; SSE4-NEXT: retq ; -; AVX-LABEL: truncstore_v2i64_v2i8: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [127,127] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488] -; AVX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vmovmskpd %xmm1, %eax -; AVX-NEXT: xorl $3, %eax -; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne .LBB8_1 -; AVX-NEXT: # %bb.2: # %else -; AVX-NEXT: testb $2, %al -; AVX-NEXT: jne .LBB8_3 -; AVX-NEXT: .LBB8_4: # %else2 -; AVX-NEXT: retq -; AVX-NEXT: .LBB8_1: # %cond.store -; AVX-NEXT: vpextrb $0, %xmm0, (%rdi) -; AVX-NEXT: testb $2, %al -; AVX-NEXT: je .LBB8_4 -; AVX-NEXT: .LBB8_3: # %cond.store1 -; AVX-NEXT: vpextrb $1, %xmm0, 1(%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: truncstore_v2i64_v2i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [127,127] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488] +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovmskpd %xmm1, %eax +; AVX1-NEXT: xorl $3, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: jne .LBB8_1 +; AVX1-NEXT: # %bb.2: # %else +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: jne .LBB8_3 +; AVX1-NEXT: .LBB8_4: # %else2 +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB8_1: # %cond.store +; AVX1-NEXT: vpextrb $0, %xmm0, (%rdi) +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: je .LBB8_4 +; AVX1-NEXT: .LBB8_3: # %cond.store1 +; AVX1-NEXT: vpextrb $1, %xmm0, 1(%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: truncstore_v2i64_v2i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [127,127] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488] +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vmovmskpd %xmm1, %eax +; AVX2-NEXT: xorl $3, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: jne .LBB8_1 +; AVX2-NEXT: # %bb.2: # %else +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: jne .LBB8_3 +; AVX2-NEXT: .LBB8_4: # %else2 +; AVX2-NEXT: retq +; AVX2-NEXT: .LBB8_1: # %cond.store +; AVX2-NEXT: vpextrb $0, %xmm0, (%rdi) +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: je .LBB8_4 +; AVX2-NEXT: .LBB8_3: # %cond.store1 +; AVX2-NEXT: vpextrb $1, %xmm0, 1(%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: truncstore_v2i64_v2i8: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll --- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll @@ -2154,9 +2154,11 @@ ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; AVX2-NEXT: vmovapd {{.*#+}} xmm2 = [4294967295,4294967295] -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372041149743103,9223372041149743103] +; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [4294967295,4294967295] +; AVX2-NEXT: # xmm2 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm3, %xmm0, %xmm3 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [9223372041149743103,9223372041149743103] ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 ; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -2273,33 +2275,63 @@ ; SSE4-NEXT: pextrw $1, %xmm0, 2(%rdi) ; SSE4-NEXT: retq ; -; AVX-LABEL: truncstore_v2i64_v2i16: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [65535,65535] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 -; AVX-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343] -; AVX-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vmovmskpd %xmm1, %eax -; AVX-NEXT: xorl $3, %eax -; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne .LBB7_1 -; AVX-NEXT: # %bb.2: # %else -; AVX-NEXT: testb $2, %al -; AVX-NEXT: jne .LBB7_3 -; AVX-NEXT: .LBB7_4: # %else2 -; AVX-NEXT: retq -; AVX-NEXT: .LBB7_1: # %cond.store -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: testb $2, %al -; AVX-NEXT: je .LBB7_4 -; AVX-NEXT: .LBB7_3: # %cond.store1 -; AVX-NEXT: vpextrw $1, %xmm0, 2(%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: truncstore_v2i64_v2i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [65535,65535] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343] +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovmskpd %xmm1, %eax +; AVX1-NEXT: xorl $3, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: jne .LBB7_1 +; AVX1-NEXT: # %bb.2: # %else +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: jne .LBB7_3 +; AVX1-NEXT: .LBB7_4: # %else2 +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB7_1: # %cond.store +; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: je .LBB7_4 +; AVX1-NEXT: .LBB7_3: # %cond.store1 +; AVX1-NEXT: vpextrw $1, %xmm0, 2(%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: truncstore_v2i64_v2i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vmovddup {{.*#+}} xmm3 = [65535,65535] +; AVX2-NEXT: # xmm3 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm4, %xmm0, %xmm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343] +; AVX2-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vmovmskpd %xmm1, %eax +; AVX2-NEXT: xorl $3, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: jne .LBB7_1 +; AVX2-NEXT: # %bb.2: # %else +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: jne .LBB7_3 +; AVX2-NEXT: .LBB7_4: # %else2 +; AVX2-NEXT: retq +; AVX2-NEXT: .LBB7_1: # %cond.store +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: je .LBB7_4 +; AVX2-NEXT: .LBB7_3: # %cond.store1 +; AVX2-NEXT: vpextrw $1, %xmm0, 2(%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: truncstore_v2i64_v2i16: ; AVX512F: # %bb.0: @@ -2423,32 +2455,61 @@ ; SSE4-NEXT: pextrb $1, %xmm3, 1(%rdi) ; SSE4-NEXT: retq ; -; AVX-LABEL: truncstore_v2i64_v2i8: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [255,255] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 -; AVX-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063] -; AVX-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 -; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vmovmskpd %xmm1, %eax -; AVX-NEXT: xorl $3, %eax -; AVX-NEXT: testb $1, %al -; AVX-NEXT: jne .LBB8_1 -; AVX-NEXT: # %bb.2: # %else -; AVX-NEXT: testb $2, %al -; AVX-NEXT: jne .LBB8_3 -; AVX-NEXT: .LBB8_4: # %else2 -; AVX-NEXT: retq -; AVX-NEXT: .LBB8_1: # %cond.store -; AVX-NEXT: vpextrb $0, %xmm0, (%rdi) -; AVX-NEXT: testb $2, %al -; AVX-NEXT: je .LBB8_4 -; AVX-NEXT: .LBB8_3: # %cond.store1 -; AVX-NEXT: vpextrb $1, %xmm0, 1(%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: truncstore_v2i64_v2i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [255,255] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063] +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovmskpd %xmm1, %eax +; AVX1-NEXT: xorl $3, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: jne .LBB8_1 +; AVX1-NEXT: # %bb.2: # %else +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: jne .LBB8_3 +; AVX1-NEXT: .LBB8_4: # %else2 +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB8_1: # %cond.store +; AVX1-NEXT: vpextrb $0, %xmm0, (%rdi) +; AVX1-NEXT: testb $2, %al +; AVX1-NEXT: je .LBB8_4 +; AVX1-NEXT: .LBB8_3: # %cond.store1 +; AVX1-NEXT: vpextrb $1, %xmm0, 1(%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: truncstore_v2i64_v2i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vmovddup {{.*#+}} xmm3 = [255,255] +; AVX2-NEXT: # xmm3 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm4, %xmm0, %xmm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063] +; AVX2-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vmovmskpd %xmm1, %eax +; AVX2-NEXT: xorl $3, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: jne .LBB8_1 +; AVX2-NEXT: # %bb.2: # %else +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: jne .LBB8_3 +; AVX2-NEXT: .LBB8_4: # %else2 +; AVX2-NEXT: retq +; AVX2-NEXT: .LBB8_1: # %cond.store +; AVX2-NEXT: vpextrb $0, %xmm0, (%rdi) +; AVX2-NEXT: testb $2, %al +; AVX2-NEXT: je .LBB8_4 +; AVX2-NEXT: .LBB8_3: # %cond.store1 +; AVX2-NEXT: vpextrb $1, %xmm0, 1(%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: truncstore_v2i64_v2i8: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll --- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll @@ -975,7 +975,8 @@ ; AVX2-FALLBACK-LABEL: vec128_i64_signed_reg_reg: ; AVX2-FALLBACK: # %bb.0: ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; AVX2-FALLBACK-NEXT: vpor %xmm3, %xmm2, %xmm3 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 @@ -992,25 +993,66 @@ ; AVX2-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; AVX2-FALLBACK-NEXT: retq ; -; XOP-LABEL: vec128_i64_signed_reg_reg: -; XOP: # %bb.0: -; XOP-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 -; XOP-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 -; XOP-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 -; XOP-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 -; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 -; XOP-NEXT: vpsubq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $1, %xmm1, %xmm2 -; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm3, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 -; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1 -; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: retq +; XOP-FALLBACK-LABEL: vec128_i64_signed_reg_reg: +; XOP-FALLBACK: # %bb.0: +; XOP-FALLBACK-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOP-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOP-FALLBACK-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOP-FALLBACK-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOP-FALLBACK-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOP-FALLBACK-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOP-FALLBACK-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOP-FALLBACK-NEXT: retq +; +; XOPAVX1-LABEL: vec128_i64_signed_reg_reg: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOPAVX1-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX1-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: vec128_i64_signed_reg_reg: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; XOPAVX2-NEXT: vpor %xmm3, %xmm2, %xmm3 +; XOPAVX2-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX2-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX2-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512F-LABEL: vec128_i64_signed_reg_reg: ; AVX512F: # %bb.0: @@ -1194,11 +1236,12 @@ ; ; AVX2-FALLBACK-LABEL: vec128_i64_unsigned_reg_reg: ; AVX2-FALLBACK: # %bb.0: -; AVX2-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-FALLBACK-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-FALLBACK-NEXT: vpxor %xmm2, %xmm0, %xmm2 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4 -; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm5 +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm5 = [1,1] +; AVX2-FALLBACK-NEXT: vpor %xmm5, %xmm4, %xmm5 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm2 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm1 @@ -1215,25 +1258,66 @@ ; AVX2-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; AVX2-FALLBACK-NEXT: retq ; -; XOP-LABEL: vec128_i64_unsigned_reg_reg: -; XOP: # %bb.0: -; XOP-NEXT: vpcomgtuq %xmm1, %xmm0, %xmm2 -; XOP-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 -; XOP-NEXT: vpcomltuq %xmm1, %xmm0, %xmm4 -; XOP-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 -; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 -; XOP-NEXT: vpsubq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $1, %xmm1, %xmm2 -; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm3, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 -; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1 -; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: retq +; XOP-FALLBACK-LABEL: vec128_i64_unsigned_reg_reg: +; XOP-FALLBACK: # %bb.0: +; XOP-FALLBACK-NEXT: vpcomgtuq %xmm1, %xmm0, %xmm2 +; XOP-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOP-FALLBACK-NEXT: vpcomltuq %xmm1, %xmm0, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOP-FALLBACK-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOP-FALLBACK-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOP-FALLBACK-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOP-FALLBACK-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOP-FALLBACK-NEXT: retq +; +; XOPAVX1-LABEL: vec128_i64_unsigned_reg_reg: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpcomgtuq %xmm1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOPAVX1-NEXT: vpcomltuq %xmm1, %xmm0, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX1-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: vec128_i64_unsigned_reg_reg: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpcomgtuq %xmm1, %xmm0, %xmm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; XOPAVX2-NEXT: vpor %xmm3, %xmm2, %xmm3 +; XOPAVX2-NEXT: vpcomltuq %xmm1, %xmm0, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX2-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX2-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512F-LABEL: vec128_i64_unsigned_reg_reg: ; AVX512F: # %bb.0: @@ -1421,7 +1505,8 @@ ; AVX2-FALLBACK: # %bb.0: ; AVX2-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; AVX2-FALLBACK-NEXT: vpor %xmm3, %xmm2, %xmm3 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 @@ -1438,26 +1523,69 @@ ; AVX2-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; AVX2-FALLBACK-NEXT: retq ; -; XOP-LABEL: vec128_i64_signed_mem_reg: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa (%rdi), %xmm1 -; XOP-NEXT: vpcomgtq %xmm0, %xmm1, %xmm2 -; XOP-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 -; XOP-NEXT: vpcomltq %xmm0, %xmm1, %xmm4 -; XOP-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4 -; XOP-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 -; XOP-NEXT: vpsubq %xmm4, %xmm0, %xmm0 -; XOP-NEXT: vpsrlq $1, %xmm0, %xmm2 -; XOP-NEXT: vpsrlq $33, %xmm0, %xmm0 -; XOP-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 -; XOP-NEXT: vpsrlq $32, %xmm3, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 -; XOP-NEXT: vpaddq %xmm0, %xmm4, %xmm0 -; XOP-NEXT: vpsllq $32, %xmm0, %xmm0 -; XOP-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: retq +; XOP-FALLBACK-LABEL: vec128_i64_signed_mem_reg: +; XOP-FALLBACK: # %bb.0: +; XOP-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1 +; XOP-FALLBACK-NEXT: vpcomgtq %xmm0, %xmm1, %xmm2 +; XOP-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOP-FALLBACK-NEXT: vpcomltq %xmm0, %xmm1, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpsubq %xmm4, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpsrlq $1, %xmm0, %xmm2 +; XOP-FALLBACK-NEXT: vpsrlq $33, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOP-FALLBACK-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; XOP-FALLBACK-NEXT: vpsllq $32, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOP-FALLBACK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOP-FALLBACK-NEXT: retq +; +; XOPAVX1-LABEL: vec128_i64_signed_mem_reg: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vmovdqa (%rdi), %xmm1 +; XOPAVX1-NEXT: vpcomgtq %xmm0, %xmm1, %xmm2 +; XOPAVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOPAVX1-NEXT: vpcomltq %xmm0, %xmm1, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsrlq $1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpsrlq $33, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; XOPAVX1-NEXT: vpsllq $32, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: vec128_i64_signed_mem_reg: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vmovdqa (%rdi), %xmm1 +; XOPAVX2-NEXT: vpcomgtq %xmm0, %xmm1, %xmm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; XOPAVX2-NEXT: vpor %xmm3, %xmm2, %xmm3 +; XOPAVX2-NEXT: vpcomltq %xmm0, %xmm1, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpsubq %xmm4, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpsrlq $1, %xmm0, %xmm2 +; XOPAVX2-NEXT: vpsrlq $33, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX2-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; XOPAVX2-NEXT: vpsllq $32, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512F-LABEL: vec128_i64_signed_mem_reg: ; AVX512F: # %bb.0: @@ -1644,7 +1772,8 @@ ; AVX2-FALLBACK: # %bb.0: ; AVX2-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; AVX2-FALLBACK-NEXT: vpor %xmm3, %xmm2, %xmm3 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 @@ -1661,26 +1790,69 @@ ; AVX2-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; AVX2-FALLBACK-NEXT: retq ; -; XOP-LABEL: vec128_i64_signed_reg_mem: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa (%rdi), %xmm1 -; XOP-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 -; XOP-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 -; XOP-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 -; XOP-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 -; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 -; XOP-NEXT: vpsubq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $1, %xmm1, %xmm2 -; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm3, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 -; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1 -; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: retq +; XOP-FALLBACK-LABEL: vec128_i64_signed_reg_mem: +; XOP-FALLBACK: # %bb.0: +; XOP-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1 +; XOP-FALLBACK-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOP-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOP-FALLBACK-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOP-FALLBACK-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOP-FALLBACK-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOP-FALLBACK-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOP-FALLBACK-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOP-FALLBACK-NEXT: retq +; +; XOPAVX1-LABEL: vec128_i64_signed_reg_mem: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vmovdqa (%rdi), %xmm1 +; XOPAVX1-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOPAVX1-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX1-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: vec128_i64_signed_reg_mem: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vmovdqa (%rdi), %xmm1 +; XOPAVX2-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; XOPAVX2-NEXT: vpor %xmm3, %xmm2, %xmm3 +; XOPAVX2-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX2-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX2-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512F-LABEL: vec128_i64_signed_reg_mem: ; AVX512F: # %bb.0: @@ -1871,7 +2043,8 @@ ; AVX2-FALLBACK-NEXT: vmovdqa (%rdi), %xmm0 ; AVX2-FALLBACK-NEXT: vmovdqa (%rsi), %xmm1 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; AVX2-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; AVX2-FALLBACK-NEXT: vpor %xmm3, %xmm2, %xmm3 ; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 ; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 @@ -1888,27 +2061,72 @@ ; AVX2-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; AVX2-FALLBACK-NEXT: retq ; -; XOP-LABEL: vec128_i64_signed_mem_mem: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa (%rdi), %xmm0 -; XOP-NEXT: vmovdqa (%rsi), %xmm1 -; XOP-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 -; XOP-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 -; XOP-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 -; XOP-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 -; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 -; XOP-NEXT: vpsubq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $1, %xmm1, %xmm2 -; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq $32, %xmm3, %xmm4 -; XOP-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 -; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1 -; XOP-NEXT: vpsllq $32, %xmm1, %xmm1 -; XOP-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; XOP-NEXT: retq +; XOP-FALLBACK-LABEL: vec128_i64_signed_mem_mem: +; XOP-FALLBACK: # %bb.0: +; XOP-FALLBACK-NEXT: vmovdqa (%rdi), %xmm0 +; XOP-FALLBACK-NEXT: vmovdqa (%rsi), %xmm1 +; XOP-FALLBACK-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOP-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOP-FALLBACK-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOP-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOP-FALLBACK-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOP-FALLBACK-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOP-FALLBACK-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOP-FALLBACK-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOP-FALLBACK-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOP-FALLBACK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOP-FALLBACK-NEXT: retq +; +; XOPAVX1-LABEL: vec128_i64_signed_mem_mem: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vmovdqa (%rdi), %xmm0 +; XOPAVX1-NEXT: vmovdqa (%rsi), %xmm1 +; XOPAVX1-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; XOPAVX1-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX1-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX1-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: vec128_i64_signed_mem_mem: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vmovdqa (%rdi), %xmm0 +; XOPAVX2-NEXT: vmovdqa (%rsi), %xmm1 +; XOPAVX2-NEXT: vpcomgtq %xmm1, %xmm0, %xmm2 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] +; XOPAVX2-NEXT: vpor %xmm3, %xmm2, %xmm3 +; XOPAVX2-NEXT: vpcomltq %xmm1, %xmm0, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4 +; XOPAVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsubq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm2 +; XOPAVX2-NEXT: vpsrlq $33, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq $32, %xmm3, %xmm4 +; XOPAVX2-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpaddq %xmm1, %xmm4, %xmm1 +; XOPAVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512F-LABEL: vec128_i64_signed_mem_mem: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -120,15 +120,25 @@ ; SSE-NEXT: paddq %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: mul_v2i64c: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117] -; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 -; AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; AVX-NEXT: retq +; AVX2-LABEL: mul_v2i64c: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [117,117] +; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: mul_v2i64c: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117] +; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX512-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: retq entry: %A = mul <2 x i64> %i, < i64 117, i64 117 > ret <2 x i64> %A diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -1207,8 +1207,11 @@ ; AVX2-LABEL: v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovapd {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vblendvpd %xmm2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX2-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775807,9223372036854775807] +; AVX2-NEXT: # xmm3 = mem[0,0] +; AVX2-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: # xmm4 = mem[0,0] +; AVX2-NEXT: vblendvpd %xmm2, %xmm3, %xmm4, %xmm3 ; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll --- a/llvm/test/CodeGen/X86/sat-add.ll +++ b/llvm/test/CodeGen/X86/sat-add.ll @@ -657,12 +657,15 @@ ; ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_min: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovapd {{.*#+}} xmm1 = [18446744073709551573,18446744073709551573] -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765] +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [18446744073709551573,18446744073709551573] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765] ; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: unsigned_sat_constant_v2i64_using_min: @@ -726,11 +729,12 @@ ; ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -793,11 +797,12 @@ ; ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -1206,8 +1211,10 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm4 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm3, %xmm0, %xmm3 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [9223372036854775807,9223372036854775807] +; AVX2-NEXT: vpxor %xmm4, %xmm1, %xmm4 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 ; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 @@ -1275,7 +1282,7 @@ ; ; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1 @@ -1344,8 +1351,10 @@ ; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775807,9223372036854775807] +; AVX2-NEXT: vpxor %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll --- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll @@ -1304,8 +1304,11 @@ ; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: vmovapd {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775807,9223372036854775807] +; AVX2-NEXT: # xmm2 = mem[0,0] +; AVX2-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: # xmm3 = mem[0,0] +; AVX2-NEXT: vblendvpd %xmm1, %xmm2, %xmm3, %xmm2 ; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 ; AVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll --- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll @@ -906,7 +906,7 @@ ; ; AVX2-LABEL: v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll --- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll @@ -240,15 +240,17 @@ ; ; CHECK-AVX2-LABEL: t3_wide: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411] -; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 -; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3 -; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; CHECK-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2863311530,2863311530] +; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 +; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 +; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [12297829382473034411,12297829382473034411] +; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; CHECK-AVX2-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; CHECK-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 -; CHECK-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; CHECK-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll --- a/llvm/test/CodeGen/X86/usub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll @@ -817,7 +817,7 @@ ; ; AVX2-LABEL: v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 diff --git a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll --- a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll +++ b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll @@ -342,7 +342,7 @@ ; ; AVX2-LABEL: ge_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 @@ -516,7 +516,7 @@ ; ; AVX2-LABEL: gt_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -764,7 +764,7 @@ ; ; AVX2-LABEL: le_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -939,7 +939,7 @@ ; ; AVX2-LABEL: lt_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -344,15 +344,27 @@ ; SSE-NEXT: orpd %xmm1, %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: fptoui_2f64_to_4i32: -; VEX: # %bb.0: -; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1 -; VEX-NEXT: vpsrad $31, %xmm1, %xmm2 -; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: fptoui_2f64_to_4i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 +; AVX1-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: fptoui_2f64_to_4i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [-2.147483648E+9,-2.147483648E+9] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vaddpd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vcvttpd2dq %xmm1, %xmm1 +; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 +; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f64_to_4i32: ; AVX512F: # %bb.0: @@ -396,15 +408,27 @@ ; SSE-NEXT: orpd %xmm1, %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: fptoui_2f64_to_2i32: -; VEX: # %bb.0: -; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1 -; VEX-NEXT: vpsrad $31, %xmm1, %xmm2 -; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: fptoui_2f64_to_2i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm1 +; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 +; AVX1-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: fptoui_2f64_to_2i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [-2.147483648E+9,-2.147483648E+9] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vaddpd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vcvttpd2dq %xmm1, %xmm1 +; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 +; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f64_to_2i32: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -52,14 +52,23 @@ ; SSE41-NEXT: cvtpd2ps %xmm0, %xmm0 ; SSE41-NEXT: retq ; -; VEX-LABEL: uitofp_2i32_to_2f32: -; VEX: # %bb.0: -; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] -; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 -; VEX-NEXT: vcvtpd2ps %xmm0, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: uitofp_2i32_to_2f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_2i32_to_2f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vcvtpd2ps %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: uitofp_2i32_to_2f32: ; AVX512F: # %bb.0: @@ -604,10 +613,14 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4841369599423283200,4841369599423283200] +; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4985484787499139072,4985484787499139072] +; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX2-NEXT: # xmm2 = mem[0,0] +; AVX2-NEXT: vsubpd %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -667,13 +680,21 @@ ; SSE41-NEXT: subpd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; VEX-LABEL: uitofp_2i32_to_2f64: -; VEX: # %bb.0: -; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] -; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: uitofp_2i32_to_2f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_2i32_to_2f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: uitofp_2i32_to_2f64: ; AVX512F: # %bb.0: @@ -1908,23 +1929,42 @@ ; SSE41-NEXT: movaps %xmm2, %xmm0 ; SSE41-NEXT: retq ; -; VEX-LABEL: uitofp_2i64_to_4f32: -; VEX: # %bb.0: -; VEX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2 -; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1 -; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 -; VEX-NEXT: vpextrq $1, %xmm1, %rax -; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 -; VEX-NEXT: vmovq %xmm1, %rax -; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 -; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero -; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2 -; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 -; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] -; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: uitofp_2i64_to_4f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2 +; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpextrq $1, %xmm1, %rax +; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 +; AVX1-NEXT: vmovq %xmm1, %rax +; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero +; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_2i64_to_4f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpsrlq $1, %xmm0, %xmm2 +; AVX2-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpextrq $1, %xmm1, %rax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 +; AVX2-NEXT: vmovq %xmm1, %rax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 +; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero +; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: uitofp_2i64_to_4f32: ; AVX512F: # %bb.0: @@ -2023,24 +2063,44 @@ ; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm3[0],zero ; SSE41-NEXT: retq ; -; VEX-LABEL: uitofp_2i64_to_2f32: -; VEX: # %bb.0: -; VEX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2 -; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1 -; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 -; VEX-NEXT: vpextrq $1, %xmm1, %rax -; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 -; VEX-NEXT: vmovq %xmm1, %rax -; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 -; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero -; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2 -; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 -; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] -; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 -; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero -; VEX-NEXT: retq +; AVX1-LABEL: uitofp_2i64_to_2f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2 +; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpextrq $1, %xmm1, %rax +; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 +; AVX1-NEXT: vmovq %xmm1, %rax +; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero +; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_2i64_to_2f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpsrlq $1, %xmm0, %xmm2 +; AVX2-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpextrq $1, %xmm1, %rax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 +; AVX2-NEXT: vmovq %xmm1, %rax +; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 +; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero +; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX2-NEXT: retq ; ; AVX512F-LABEL: uitofp_2i64_to_2f32: ; AVX512F: # %bb.0: @@ -3276,10 +3336,14 @@ ; AVX2-NEXT: vmovdqa (%rdi), %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4841369599423283200,4841369599423283200] +; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4985484787499139072,4985484787499139072] +; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX2-NEXT: # xmm2 = mem[0,0] +; AVX2-NEXT: vsubpd %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -3343,13 +3407,21 @@ ; SSE41-NEXT: subpd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; VEX-LABEL: uitofp_load_2i32_to_2f64: -; VEX: # %bb.0: -; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] -; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 -; VEX-NEXT: retq +; AVX1-LABEL: uitofp_load_2i32_to_2f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_load_2i32_to_2f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: uitofp_load_2i32_to_2f64: ; AVX512F: # %bb.0: @@ -5658,15 +5730,17 @@ ; ; AVX2-LABEL: PR43609: ; AVX2: # %bb.0: -; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] ; AVX2-NEXT: vpor %xmm4, %xmm3, %xmm3 ; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] ; AVX2-NEXT: vpor %xmm5, %xmm0, %xmm0 -; AVX2-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX2-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX2-NEXT: # xmm6 = mem[0,0] ; AVX2-NEXT: vsubpd %xmm6, %xmm0, %xmm0 ; AVX2-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] @@ -5675,7 +5749,8 @@ ; AVX2-NEXT: vpor %xmm5, %xmm1, %xmm1 ; AVX2-NEXT: vsubpd %xmm6, %xmm1, %xmm1 ; AVX2-NEXT: vaddpd %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX2-NEXT: # xmm2 = mem[0,0] ; AVX2-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vmovupd %xmm0, (%rdi) diff --git a/llvm/test/CodeGen/X86/vec_minmax_uint.ll b/llvm/test/CodeGen/X86/vec_minmax_uint.ll --- a/llvm/test/CodeGen/X86/vec_minmax_uint.ll +++ b/llvm/test/CodeGen/X86/vec_minmax_uint.ll @@ -71,7 +71,7 @@ ; ; AVX2-LABEL: max_gt_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 @@ -477,7 +477,7 @@ ; ; AVX2-LABEL: max_ge_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 @@ -882,7 +882,7 @@ ; ; AVX2-LABEL: min_lt_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 @@ -1290,7 +1290,7 @@ ; ; AVX2-LABEL: min_le_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll --- a/llvm/test/CodeGen/X86/vec_uaddo.ll +++ b/llvm/test/CodeGen/X86/vec_uaddo.ll @@ -855,16 +855,27 @@ ; SSE-NEXT: movdqa %xmm1, (%rdi) ; SSE-NEXT: retq ; -; AVX-LABEL: uaddo_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm0 -; AVX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovdqa %xmm1, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: uaddo_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vmovdqa %xmm1, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: uaddo_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm0 +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vmovdqa %xmm1, (%rdi) +; AVX2-NEXT: retq ; ; AVX512-LABEL: uaddo_v2i64: ; AVX512: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll --- a/llvm/test/CodeGen/X86/vec_usubo.ll +++ b/llvm/test/CodeGen/X86/vec_usubo.ll @@ -902,16 +902,27 @@ ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: usubo_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm0 -; AVX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovdqa %xmm1, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: usubo_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vmovdqa %xmm1, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: usubo_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm0 +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vmovdqa %xmm1, (%rdi) +; AVX2-NEXT: retq ; ; AVX512-LABEL: usubo_v2i64: ; AVX512: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -470,10 +470,21 @@ ; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v16i8: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v16i8: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v16i8: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v16i8: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) ret <16 x i8> %b } @@ -549,11 +560,24 @@ ; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v8i16: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v8i16: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v8i16: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v8i16: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) ret <8 x i16> %b } @@ -634,11 +658,24 @@ ; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v4i32: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v4i32: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v4i32: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v4i32: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) ret <4 x i32> %b } @@ -721,11 +758,24 @@ ; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v2i64: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v2i64: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v2i64: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v2i64: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) ret <2 x i64> %b } diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll --- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -1034,16 +1034,27 @@ ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; -; AVX-LABEL: bool_reduction_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: cmpb $3, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; AVX1-LABEL: bool_reduction_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovmskpd %xmm0, %eax +; AVX1-NEXT: cmpb $3, %al +; AVX1-NEXT: sete %al +; AVX1-NEXT: retq +; +; AVX2-LABEL: bool_reduction_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovmskpd %xmm0, %eax +; AVX2-NEXT: cmpb $3, %al +; AVX2-NEXT: sete %al +; AVX2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v2i64: ; AVX512: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -989,16 +989,27 @@ ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; -; AVX-LABEL: bool_reduction_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: testl %eax, %eax -; AVX-NEXT: setne %al -; AVX-NEXT: retq +; AVX1-LABEL: bool_reduction_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovmskpd %xmm0, %eax +; AVX1-NEXT: testl %eax, %eax +; AVX1-NEXT: setne %al +; AVX1-NEXT: retq +; +; AVX2-LABEL: bool_reduction_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovmskpd %xmm0, %eax +; AVX2-NEXT: testl %eax, %eax +; AVX2-NEXT: setne %al +; AVX2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v2i64: ; AVX512: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -84,7 +84,7 @@ ; ; AVX2-LABEL: var_funnnel_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlq $1, %xmm1, %xmm1 ; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 @@ -167,7 +167,7 @@ ; ; XOPAVX2-LABEL: var_funnnel_v2i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 @@ -952,16 +952,27 @@ ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_funnnel_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 -; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_funnnel_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_funnnel_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; AVX2-NEXT: vpsrlq $1, %xmm1, %xmm1 +; AVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v2i64: ; AVX512F: # %bb.0: @@ -1023,16 +1034,27 @@ ; AVX512VLVBMI2-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; -; XOP-LABEL: splatvar_funnnel_v2i64: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; XOP-NEXT: vpsrlq $1, %xmm1, %xmm1 -; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 -; XOP-NEXT: retq +; XOPAVX1-LABEL: splatvar_funnnel_v2i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: splatvar_funnnel_v2i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i64: ; X86-SSE2: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -761,7 +761,7 @@ ; ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm1 ; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 @@ -849,7 +849,7 @@ ; ; XOPAVX2-LABEL: splatvar_funnnel_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlq $1, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -87,7 +87,7 @@ ; ; AVX2-LABEL: var_funnnel_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsllvq %xmm3, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 @@ -731,17 +731,29 @@ ; SSE-NEXT: por %xmm4, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_funnnel_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] -; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX-NEXT: vpsllq %xmm3, %xmm0, %xmm3 -; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1 -; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_funnnel_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_funnnel_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX2-NEXT: vpsllq %xmm3, %xmm0, %xmm3 +; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1 +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v2i64: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -587,7 +587,7 @@ ; ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsllq %xmm3, %ymm0, %ymm3 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -84,7 +84,7 @@ ; ; AVX2-LABEL: var_funnnel_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -168,7 +168,7 @@ ; ; XOPAVX2-LABEL: var_funnnel_v2i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -1041,16 +1041,27 @@ ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_funnnel_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 -; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpsllq $1, %xmm0, %xmm0 -; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_funnnel_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 +; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_funnnel_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 +; AVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v2i64: ; AVX512F: # %bb.0: @@ -1113,16 +1124,27 @@ ; AVX512VLVBMI2-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; -; XOP-LABEL: splatvar_funnnel_v2i64: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4 -; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 -; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpsllq $1, %xmm0, %xmm0 -; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 -; XOP-NEXT: retq +; XOPAVX1-LABEL: splatvar_funnnel_v2i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: splatvar_funnnel_v2i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 +; XOPAVX2-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpsllq $1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpsllq %xmm2, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i64: ; X86-SSE2: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -792,7 +792,7 @@ ; ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -881,7 +881,7 @@ ; ; XOPAVX2-LABEL: splatvar_funnnel_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -87,7 +87,7 @@ ; ; AVX2-LABEL: var_funnnel_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsrlvq %xmm3, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 @@ -753,17 +753,29 @@ ; SSE-NEXT: por %xmm4, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_funnnel_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] -; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX-NEXT: vpsrlq %xmm3, %xmm0, %xmm3 -; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1 -; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_funnnel_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_funnnel_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX2-NEXT: vpsrlq %xmm3, %xmm0, %xmm3 +; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1 +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v2i64: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -613,7 +613,7 @@ ; ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -1490,14 +1490,15 @@ ; ; X64-AVX2-LABEL: mul_v2i64_neg_15_63: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551601,18446744073709551553] -; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3 -; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295] +; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 +; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551601,18446744073709551553] +; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; X64-AVX2-NEXT: retq ; ; X64-AVX512DQ-LABEL: mul_v2i64_neg_15_63: @@ -1551,14 +1552,15 @@ ; ; X64-AVX2-LABEL: mul_v2i64_neg_17_65: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551599,18446744073709551551] -; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3 -; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295] +; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 +; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709551599,18446744073709551551] +; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpsllq $32, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; X64-AVX2-NEXT: retq ; ; X64-AVX512DQ-LABEL: mul_v2i64_neg_17_65: diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll --- a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll @@ -17262,7 +17262,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_2_v2i64: @@ -17447,7 +17448,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -17634,7 +17635,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_3_v2i64: @@ -17819,7 +17821,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -18006,7 +18008,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_4_v2i64: @@ -18191,7 +18194,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -18378,7 +18381,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_5_v2i64: @@ -18563,7 +18567,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -18750,7 +18754,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_6_v2i64: @@ -18935,7 +18940,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -19122,7 +19127,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_7_v2i64: @@ -19307,7 +19313,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -19494,7 +19500,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_8_v2i64: @@ -19679,7 +19686,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -19866,7 +19873,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_9_v2i64: @@ -20051,7 +20059,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -20238,7 +20246,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_10_v2i64: @@ -20423,7 +20432,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -20610,7 +20619,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_11_v2i64: @@ -20795,7 +20805,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -20982,7 +20992,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_12_v2i64: @@ -21167,7 +21178,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -21354,7 +21365,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_13_v2i64: @@ -21539,7 +21551,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -21726,7 +21738,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_14_v2i64: @@ -21911,7 +21924,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -22098,7 +22111,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_15_v2i64: @@ -22283,7 +22297,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -22470,7 +22484,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_16_v2i64: @@ -22655,7 +22670,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -22842,7 +22857,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_17_v2i64: @@ -23027,7 +23043,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -23214,7 +23230,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_18_v2i64: @@ -23399,7 +23416,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -23586,7 +23603,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_19_v2i64: @@ -23771,7 +23789,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -23958,7 +23976,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_20_v2i64: @@ -24143,7 +24162,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -24330,7 +24349,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_21_v2i64: @@ -24515,7 +24535,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -24702,7 +24722,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_22_v2i64: @@ -24887,7 +24908,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -25074,7 +25095,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_23_v2i64: @@ -25259,7 +25281,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -25446,7 +25468,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_24_v2i64: @@ -25631,7 +25654,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -25818,7 +25841,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_25_v2i64: @@ -26003,7 +26027,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -26190,7 +26214,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_26_v2i64: @@ -26375,7 +26400,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -26562,7 +26587,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_27_v2i64: @@ -26747,7 +26773,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -26934,7 +26960,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_28_v2i64: @@ -27119,7 +27146,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -27306,7 +27333,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_29_v2i64: @@ -27491,7 +27519,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -27678,7 +27706,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_30_v2i64: @@ -27863,7 +27892,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -28050,7 +28079,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_31_v2i64: @@ -28235,7 +28265,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -28422,7 +28452,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_32_v2i64: @@ -28607,7 +28638,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -28794,7 +28825,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_33_v2i64: @@ -28979,7 +29011,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -29166,7 +29198,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_34_v2i64: @@ -29351,7 +29384,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -29538,7 +29571,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_35_v2i64: @@ -29723,7 +29757,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -29910,7 +29944,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_36_v2i64: @@ -30095,7 +30130,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -30282,7 +30317,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_37_v2i64: @@ -30467,7 +30503,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -30654,7 +30690,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_38_v2i64: @@ -30839,7 +30876,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -31026,7 +31063,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_39_v2i64: @@ -31211,7 +31249,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -31398,7 +31436,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_40_v2i64: @@ -31583,7 +31622,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -31770,7 +31809,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_41_v2i64: @@ -31955,7 +31995,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -32142,7 +32182,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_42_v2i64: @@ -32327,7 +32368,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -32514,7 +32555,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_43_v2i64: @@ -32699,7 +32741,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -32886,7 +32928,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_44_v2i64: @@ -33071,7 +33114,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -33258,7 +33301,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_45_v2i64: @@ -33443,7 +33487,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -33630,7 +33674,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_46_v2i64: @@ -33815,7 +33860,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -34002,7 +34047,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_47_v2i64: @@ -34187,7 +34233,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -34374,7 +34420,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_48_v2i64: @@ -34559,7 +34606,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -34746,7 +34793,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_49_v2i64: @@ -34931,7 +34979,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -35118,7 +35166,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_50_v2i64: @@ -35303,7 +35352,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -35490,7 +35539,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_51_v2i64: @@ -35675,7 +35725,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -35862,7 +35912,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_52_v2i64: @@ -36047,7 +36098,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -36234,7 +36285,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_53_v2i64: @@ -36419,7 +36471,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -36606,7 +36658,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_54_v2i64: @@ -36791,7 +36844,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -36978,7 +37031,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_55_v2i64: @@ -37163,7 +37217,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -37350,7 +37404,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_56_v2i64: @@ -37535,7 +37590,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -37722,7 +37777,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_57_v2i64: @@ -37907,7 +37963,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -38094,7 +38150,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_58_v2i64: @@ -38279,7 +38336,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -38466,7 +38523,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_59_v2i64: @@ -38651,7 +38709,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -38838,7 +38896,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_60_v2i64: @@ -39023,7 +39082,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -39210,7 +39269,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_61_v2i64: @@ -39395,7 +39455,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; @@ -39582,7 +39642,8 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512VPOPCNTDQ-LABEL: ugt_62_v2i64: @@ -39767,7 +39828,7 @@ ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [63,63] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -844,7 +844,8 @@ ; ; AVX2-LABEL: trunc_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] +; AVX2-NEXT: vptest %xmm1, %xmm0 ; AVX2-NEXT: sete %al ; AVX2-NEXT: retq ; @@ -1057,7 +1058,8 @@ ; AVX2-LABEL: PR44781: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqu (%rdi), %xmm0 -; AVX2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64424509455,64424509455] +; AVX2-NEXT: vptest %xmm1, %xmm0 ; AVX2-NEXT: sete %al ; AVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll --- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll @@ -63,16 +63,27 @@ ; SSE42-NEXT: movq %xmm2, %rax ; SSE42-NEXT: retq ; -; AVX-LABEL: test_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2 -; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: retq +; AVX1-LABEL: test_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: retq ; ; AVX512BW-LABEL: test_v2i64: ; AVX512BW: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll --- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll @@ -63,16 +63,27 @@ ; SSE42-NEXT: movq %xmm2, %rax ; SSE42-NEXT: retq ; -; AVX-LABEL: test_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2 -; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: retq +; AVX1-LABEL: test_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: retq ; ; AVX512BW-LABEL: test_v2i64: ; AVX512BW: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -73,7 +73,7 @@ ; ; AVX2-LABEL: var_rotate_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [64,64] ; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2 ; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0 @@ -720,14 +720,23 @@ ; SSE-NEXT: por %xmm3, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_rotate_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64] -; AVX-NEXT: vpsubq %xmm1, %xmm2, %xmm2 -; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_rotate_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64] +; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_rotate_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [64,64] +; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_rotate_v2i64: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -578,7 +578,7 @@ ; AVX2-LABEL: splatvar_rotate_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [64,64] ; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -66,7 +66,7 @@ ; ; AVX2-LABEL: var_shift_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2 ; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 @@ -632,14 +632,23 @@ ; SSE-NEXT: psubq %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_shift_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 -; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsubq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_shift_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_shift_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_shift_v2i64: ; XOPAVX1: # %bb.0: @@ -933,15 +942,26 @@ ; SSE-NEXT: psubq %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_modulo_shift_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 -; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsubq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_modulo_shift_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatvar_modulo_shift_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64: ; XOPAVX1: # %bb.0: @@ -954,7 +974,8 @@ ; ; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1 @@ -1601,10 +1622,16 @@ ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; AVX2-NEXT: retq ; -; XOP-LABEL: splatconstant_shift_v2i64: -; XOP: # %bb.0: -; XOP-NEXT: vpshaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; XOP-NEXT: retq +; XOPAVX1-LABEL: splatconstant_shift_v2i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpshaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: splatconstant_shift_v2i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709551609,18446744073709551609] +; XOPAVX2-NEXT: vpshaq %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatconstant_shift_v2i64: ; AVX512: # %bb.0: @@ -1764,15 +1791,25 @@ ; SSE-NEXT: psubq %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: PR52719: -; AVX: # %bb.0: -; AVX-NEXT: vmovd %edi, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 -; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpsubq %xmm2, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: PR52719: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovd %edi, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR52719: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovd %edi, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: PR52719: ; XOPAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -1022,7 +1022,8 @@ ; ; AVX2-LABEL: splatvar_modulo_shift_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 ; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 @@ -1044,7 +1045,8 @@ ; ; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 ; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -775,17 +775,31 @@ ; SSE-NEXT: psrlq %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_modulo_shift_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_modulo_shift_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq ; -; XOP-LABEL: splatvar_modulo_shift_v2i64: -; XOP: # %bb.0: -; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; XOP-NEXT: retq +; AVX2-LABEL: splatvar_modulo_shift_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_modulo_shift_v2i64: ; AVX512: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -832,7 +832,8 @@ ; ; AVX2-LABEL: splatvar_modulo_shift_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -847,7 +848,8 @@ ; ; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -682,17 +682,31 @@ ; SSE-NEXT: psllq %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: splatvar_modulo_shift_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatvar_modulo_shift_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq ; -; XOP-LABEL: splatvar_modulo_shift_v2i64: -; XOP: # %bb.0: -; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0 -; XOP-NEXT: retq +; AVX2-LABEL: splatvar_modulo_shift_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; XOPAVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_modulo_shift_v2i64: ; AVX512: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -757,7 +757,8 @@ ; ; AVX2-LABEL: splatvar_modulo_shift_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -772,7 +773,8 @@ ; ; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll --- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll @@ -107,16 +107,27 @@ ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_packus_v2i64_v2i32: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_packus_v2i64_v2i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v2i64_v2i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_packus_v2i64_v2i32: ; AVX512F: # %bb.0: @@ -257,17 +268,29 @@ ; SSE41-NEXT: movq %xmm0, (%rdi) ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_packus_v2i64_v2i32_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovq %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_packus_v2i64_v2i32_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vmovq %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v2i64_v2i32_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vmovq %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_packus_v2i64_v2i32_store: ; AVX512F: # %bb.0: @@ -1115,7 +1138,7 @@ ; ; AVX2-SLOW-LABEL: trunc_packus_v2i64_v2i16: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -1127,7 +1150,7 @@ ; ; AVX2-FAST-LABEL: trunc_packus_v2i64_v2i16: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] ; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -1293,7 +1316,7 @@ ; ; AVX2-SLOW-LABEL: trunc_packus_v2i64_v2i16_store: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -1306,7 +1329,7 @@ ; ; AVX2-FAST-LABEL: trunc_packus_v2i64_v2i16_store: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] ; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -2801,16 +2824,27 @@ ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_packus_v2i64_v2i8: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_packus_v2i64_v2i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v2i64_v2i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [255,255] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_packus_v2i64_v2i8: ; AVX512F: # %bb.0: @@ -2955,17 +2989,29 @@ ; SSE41-NEXT: pextrw $0, %xmm1, (%rdi) ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_packus_v2i64_v2i8_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_packus_v2i64_v2i8_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v2i64_v2i8_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [255,255] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_packus_v2i64_v2i8_store: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll --- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll @@ -113,16 +113,27 @@ ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_ssat_v2i64_v2i32: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_ssat_v2i64_v2i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v2i64_v2i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_ssat_v2i64_v2i32: ; AVX512F: # %bb.0: @@ -259,17 +270,29 @@ ; SSE41-NEXT: movq %xmm0, (%rdi) ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_ssat_v2i64_v2i32_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovlpd %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_ssat_v2i64_v2i32_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vmovlpd %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v2i64_v2i32_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vmovlpd %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_ssat_v2i64_v2i32_store: ; AVX512F: # %bb.0: @@ -1139,10 +1162,10 @@ ; ; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32767,32767] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -1151,10 +1174,10 @@ ; ; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32767,32767] ; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] ; AVX2-FAST-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] @@ -1311,10 +1334,10 @@ ; ; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16_store: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32767,32767] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -1324,10 +1347,10 @@ ; ; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16_store: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32767,32767] ; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] ; AVX2-FAST-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u] @@ -2566,16 +2589,27 @@ ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_ssat_v2i64_v2i8: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_ssat_v2i64_v2i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v2i64_v2i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [127,127] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_ssat_v2i64_v2i8: ; AVX512F: # %bb.0: @@ -2715,17 +2749,29 @@ ; SSE41-NEXT: pextrw $0, %xmm1, (%rdi) ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_ssat_v2i64_v2i8_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_ssat_v2i64_v2i8_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v2i64_v2i8_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [127,127] +; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_ssat_v2i64_v2i8_store: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll --- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll @@ -75,15 +75,27 @@ ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_usat_v2i64_v2i32: -; AVX: # %bb.0: -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] -; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_usat_v2i64_v2i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v2i64_v2i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] +; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_usat_v2i64_v2i32: ; AVX512F: # %bb.0: @@ -180,16 +192,29 @@ ; SSE41-NEXT: movq %xmm0, (%rdi) ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_usat_v2i64_v2i32_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] -; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vmovlpd %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_usat_v2i64_v2i32_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vmovlpd %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v2i64_v2i32_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [4294967295,4294967295] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] +; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vmovlpd %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_usat_v2i64_v2i32_store: ; AVX512F: # %bb.0: @@ -793,9 +818,11 @@ ; ; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535] -; AVX2-SLOW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] +; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535] +; AVX2-SLOW-NEXT: # xmm1 = mem[0,0] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -804,9 +831,11 @@ ; ; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535] -; AVX2-FAST-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] +; AVX2-FAST-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535] +; AVX2-FAST-NEXT: # xmm1 = mem[0,0] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-FAST-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] ; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] @@ -922,9 +951,11 @@ ; ; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16_store: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535] -; AVX2-SLOW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] +; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535] +; AVX2-SLOW-NEXT: # xmm1 = mem[0,0] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] ; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -934,9 +965,11 @@ ; ; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16_store: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535] -; AVX2-FAST-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] +; AVX2-FAST-NEXT: vmovddup {{.*#+}} xmm1 = [65535,65535] +; AVX2-FAST-NEXT: # xmm1 = mem[0,0] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-FAST-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] ; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u] @@ -2098,15 +2131,27 @@ ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_usat_v2i64_v2i8: -; AVX: # %bb.0: -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] -; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: retq +; AVX1-LABEL: trunc_usat_v2i64_v2i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [255,255] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v2i64_v2i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [255,255] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] +; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_usat_v2i64_v2i8: ; AVX512F: # %bb.0: @@ -2206,16 +2251,29 @@ ; SSE41-NEXT: pextrw $0, %xmm2, (%rdi) ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_usat_v2i64_v2i8_store: -; AVX: # %bb.0: -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255] -; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] -; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: trunc_usat_v2i64_v2i8_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [255,255] +; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v2i64_v2i8_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [255,255] +; AVX2-NEXT: # xmm1 = mem[0,0] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] +; AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: retq ; ; AVX512F-LABEL: trunc_usat_v2i64_v2i8_store: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll --- a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll @@ -2680,11 +2680,18 @@ ; X64-SSE2-NEXT: psrlq $1, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, ret <2 x i64> %t1 @@ -2709,11 +2716,18 @@ ; X64-SSE2-NEXT: psrlq $15, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $15, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $15, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $15, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, ret <2 x i64> %t1 @@ -2737,11 +2751,18 @@ ; X64-SSE2-NEXT: psrlq $16, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $16, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $16, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $16, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, ret <2 x i64> %t1 @@ -2765,11 +2786,18 @@ ; X64-SSE2-NEXT: psrlq $17, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $17, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $17, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $17, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, ret <2 x i64> %t1 @@ -2793,11 +2821,18 @@ ; X64-SSE2-NEXT: psrlq $18, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $18, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $18, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $18, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, ret <2 x i64> %t1 @@ -2822,11 +2857,18 @@ ; X64-SSE2-NEXT: psrlq $1, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, ret <2 x i64> %t1 @@ -2857,11 +2899,18 @@ ; X64-SSE2-NEXT: psrlq $32, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = lshr <2 x i64> %t0, ret <2 x i64> %t1 @@ -2936,11 +2985,18 @@ ; X64-SSE2-NEXT: psrlq $1, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = ashr <2 x i64> %t0, ret <2 x i64> %t1 @@ -2965,11 +3021,18 @@ ; X64-SSE2-NEXT: psrlq $15, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $15, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $15, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $15, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = ashr <2 x i64> %t0, ret <2 x i64> %t1 @@ -2993,11 +3056,18 @@ ; X64-SSE2-NEXT: psrlq $16, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $16, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $16, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $16, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = ashr <2 x i64> %t0, ret <2 x i64> %t1 @@ -3021,11 +3091,18 @@ ; X64-SSE2-NEXT: psrlq $17, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $17, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $17, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $17, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = ashr <2 x i64> %t0, ret <2 x i64> %t1 @@ -3049,11 +3126,18 @@ ; X64-SSE2-NEXT: psrlq $18, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlq $18, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlq $18, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlq $18, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = ashr <2 x i64> %t0, ret <2 x i64> %t1 @@ -3099,7 +3183,8 @@ ; ; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpsrad $1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] @@ -3155,7 +3240,8 @@ ; ; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_32: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; X64-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] @@ -3298,11 +3384,18 @@ ; X64-SSE2-NEXT: paddq %xmm0, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_shl_1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_shl_1: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, ret <2 x i64> %t1 @@ -3333,11 +3426,18 @@ ; X64-SSE2-NEXT: psllq $32, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_32: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_shl_32: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_shl_32: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, ret <2 x i64> %t1 @@ -3410,11 +3510,18 @@ ; X64-SSE2-NEXT: psllq $15, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsllq $15, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsllq $15, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllq $15, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, ret <2 x i64> %t1 @@ -3438,11 +3545,18 @@ ; X64-SSE2-NEXT: psllq $16, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsllq $16, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsllq $16, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllq $16, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, ret <2 x i64> %t1 @@ -3466,11 +3580,18 @@ ; X64-SSE2-NEXT: psllq $17, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsllq $17, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsllq $17, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllq $17, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, ret <2 x i64> %t1 @@ -3494,11 +3615,18 @@ ; X64-SSE2-NEXT: psllq $18, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpsllq $18, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsllq $18, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllq $18, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, ret <2 x i64> %t1 @@ -3523,11 +3651,18 @@ ; X64-SSE2-NEXT: paddq %xmm0, %xmm0 ; X64-SSE2-NEXT: retq ; -; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024] +; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq %t0 = and <2 x i64> %a0, %t1 = shl <2 x i64> %t0, ret <2 x i64> %t1 diff --git a/llvm/test/CodeGen/X86/vselect-minmax.ll b/llvm/test/CodeGen/X86/vselect-minmax.ll --- a/llvm/test/CodeGen/X86/vselect-minmax.ll +++ b/llvm/test/CodeGen/X86/vselect-minmax.ll @@ -9549,7 +9549,7 @@ ; ; AVX2-LABEL: test181: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 @@ -9617,7 +9617,7 @@ ; ; AVX2-LABEL: test182: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 @@ -9685,7 +9685,7 @@ ; ; AVX2-LABEL: test183: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 @@ -9753,7 +9753,7 @@ ; ; AVX2-LABEL: test184: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 @@ -10055,7 +10055,7 @@ ; ; AVX2-LABEL: test189: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 @@ -10123,7 +10123,7 @@ ; ; AVX2-LABEL: test190: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 @@ -10191,7 +10191,7 @@ ; ; AVX2-LABEL: test191: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 @@ -10259,7 +10259,7 @@ ; ; AVX2-LABEL: test192: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2