diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9503,7 +9503,8 @@ // For size optimization, also splat v2f64 and v2i64, and for size opt // with AVX2, also splat i8 and i16. // With pattern matching, the VBROADCAST node may become a VMOVDDUP. - if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || + if (ScalarSize == 32 || + (ScalarSize == 64 && (IsGE256 || Subtarget.hasAVX512())) || (ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) || (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { const Constant *C = nullptr; diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1516,9 +1516,9 @@ ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4] -; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4] +; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x79,0x59,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -293,7 +293,7 @@ define <2 x i64> @imulq128_bcast(<2 x i64> %x) { ; AVX512F-LABEL: imulq128_bcast: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8086,8086] ; AVX512F-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 ; AVX512F-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 @@ -303,7 +303,7 @@ ; ; AVX512VL-LABEL: imulq128_bcast: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8086,8086] ; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 ; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 @@ -313,7 +313,7 @@ ; ; AVX512BW-LABEL: imulq128_bcast: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8086,8086] ; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 @@ -324,15 +324,14 @@ ; AVX512DQ-LABEL: imulq128_bcast: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086] -; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; SKX-LABEL: imulq128_bcast: ; SKX: # %bb.0: -; SKX-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; SKX-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; SKX-NEXT: retq %z = mul <2 x i64> %x, ret <2 x i64>%z diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll --- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll @@ -177,7 +177,7 @@ ; CHECK-LABEL: bcast_unfold_add_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB5_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -584,7 +584,8 @@ ; CHECK-LABEL: bcast_unfold_or_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [3,3] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3,3] +; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB17_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -786,7 +787,8 @@ ; CHECK-LABEL: bcast_unfold_fneg_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB23_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1003,7 +1005,8 @@ ; CHECK-LABEL: bcast_unfold_fabs_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [NaN,NaN] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [NaN,NaN] +; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB29_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1208,7 +1211,8 @@ ; CHECK-LABEL: bcast_unfold_fadd_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB35_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1410,7 +1414,8 @@ ; CHECK-LABEL: bcast_unfold_fmul_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [3.0E+0,3.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3.0E+0,3.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB41_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1617,7 +1622,8 @@ ; CHECK-LABEL: bcast_unfold_fdiv_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB47_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1865,7 +1871,8 @@ ; CHECK-LABEL: bcast_unfold_fma213_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB54_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1900,7 +1907,8 @@ ; CHECK-LABEL: bcast_unfold_fma231_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB55_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2186,7 +2194,8 @@ ; CHECK-LABEL: bcast_unfold_fmax_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB63_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2400,7 +2409,8 @@ ; CHECK-LABEL: bcast_unfold_fmin_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB69_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2611,7 +2621,7 @@ ; CHECK-LABEL: bcast_unfold_smin_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB75_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2819,7 +2829,7 @@ ; CHECK-LABEL: bcast_unfold_smax_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB81_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3027,7 +3037,7 @@ ; CHECK-LABEL: bcast_unfold_umin_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB87_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3235,7 +3245,7 @@ ; CHECK-LABEL: bcast_unfold_umax_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB93_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3449,13 +3459,13 @@ ; CHECK-LABEL: bcast_unfold_pcmpgt_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB99_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %xmm1 ; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 -; CHECK-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} ; CHECK-NEXT: vmovdqu %xmm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $16, %rax ; CHECK-NEXT: jne .LBB99_1 @@ -3668,13 +3678,13 @@ ; CHECK-LABEL: bcast_unfold_pcmpeq_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB105_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %xmm1 ; CHECK-NEXT: vpcmpeqq %xmm0, %xmm1, %k1 -; CHECK-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} ; CHECK-NEXT: vmovdqu %xmm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $16, %rax ; CHECK-NEXT: jne .LBB105_1 @@ -3890,13 +3900,13 @@ ; CHECK-LABEL: bcast_unfold_pcmp_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB111_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovdqu (%rdi,%rax,8), %xmm1 ; CHECK-NEXT: vpcmpltq %xmm0, %xmm1, %k1 -; CHECK-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} ; CHECK-NEXT: vmovdqu %xmm1, (%rdi,%rax,8) ; CHECK-NEXT: addq $2, %rax ; CHECK-NEXT: cmpq $1023, %rax # imm = 0x3FF @@ -4115,13 +4125,13 @@ ; CHECK-LABEL: bcast_unfold_pcmpu_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB117_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovdqu (%rdi,%rax,8), %xmm1 ; CHECK-NEXT: vpcmpltuq %xmm0, %xmm1, %k1 -; CHECK-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} ; CHECK-NEXT: vmovdqu %xmm1, (%rdi,%rax,8) ; CHECK-NEXT: addq $2, %rax ; CHECK-NEXT: cmpq $1023, %rax # imm = 0x3FF @@ -4340,8 +4350,10 @@ ; CHECK-LABEL: bcast_unfold_cmp_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] -; CHECK-NEXT: vmovapd {{.*#+}} xmm1 = [3.0E+0,3.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [3.0E+0,3.0E+0] +; CHECK-NEXT: # xmm1 = mem[0,0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB123_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -3045,7 +3045,8 @@ ; KNL: # %bb.0: ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero -; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_2xi1_to_2xi64: @@ -3059,7 +3060,7 @@ ; AVX512DQNOBW: # %bb.0: ; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero -; AVX512DQNOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512DQNOBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512DQNOBW-NEXT: retq %mask = icmp eq <2 x i8> %x, %y %1 = zext <2 x i1> %mask to <2 x i64> diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll --- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -1192,8 +1192,10 @@ ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x75,0xc1] ; AVX512-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0] ; AVX512-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A] -; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512-NEXT: ## encoding: [0xc4,0xe2,0x79,0x59,0x0d,A,A,A,A] +; AVX512-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0xc1] ; AVX512-NEXT: retq ## encoding: [0xc3] ; ; SKX-LABEL: test45: @@ -1213,8 +1215,11 @@ ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x00] ; AVX512-NEXT: vpermilps $212, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xd4] ; AVX512-NEXT: ## xmm0 = xmm0[0,1,1,3] -; AVX512-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0x05,A,A,A,A] +; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [1,1] +; AVX512-NEXT: ## encoding: [0xc5,0xfb,0x12,0x0d,A,A,A,A] ; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512-NEXT: ## xmm1 = mem[0,0] +; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0xc1] ; AVX512-NEXT: retq ## encoding: [0xc3] ; ; SKX-LABEL: test46: diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith.ll b/llvm/test/CodeGen/X86/avx512fp16-arith.ll --- a/llvm/test/CodeGen/X86/avx512fp16-arith.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-arith.ll @@ -370,7 +370,7 @@ define <8 x half> @fcopysignv8f16(<8 x half> %x, <8 x half> %y) { ; CHECK-LABEL: fcopysignv8f16: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; CHECK-NEXT: retq %a = call <8 x half> @llvm.copysign.v8f16(<8 x half> %x, <8 x half> %y) ret <8 x half> %a diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll @@ -761,7 +761,7 @@ define <2 x half> @test_u33tofp2(<2 x i33> %arg0) { ; CHECK-LABEL: test_u33tofp2: ; CHECK: # %bb.0: -; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = uitofp <2 x i33> %arg0 to <2 x half> diff --git a/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll b/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll --- a/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll @@ -357,7 +357,7 @@ define <8 x half> @fadd_bitcast_fneg_vec_width(<8 x half> %x, <8 x half> %y) { ; CHECK-LABEL: fadd_bitcast_fneg_vec_width: ; CHECK: # %bb.0: -; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %bc1 = bitcast <8 x half> %y to <2 x i64> @@ -370,7 +370,7 @@ define <8 x half> @fsub_bitcast_fneg_vec_width(<8 x half> %x, <8 x half> %y) { ; CHECK-LABEL: fsub_bitcast_fneg_vec_width: ; CHECK: # %bb.0: -; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %bc1 = bitcast <8 x half> %y to <2 x i64> diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll --- a/llvm/test/CodeGen/X86/avx512vl-logic.ll +++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll @@ -168,7 +168,7 @@ define <2 x i64> @vpandq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vpandq128: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: @@ -181,7 +181,7 @@ define <2 x i64> @vpandnq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vpandnq128: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; CHECK-NEXT: vpandn %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq entry: @@ -195,7 +195,7 @@ define <2 x i64> @vporq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vporq128: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: @@ -208,7 +208,7 @@ define <2 x i64> @vpxorq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vpxorq128: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/fma-fneg-combine.ll b/llvm/test/CodeGen/X86/fma-fneg-combine.ll --- a/llvm/test/CodeGen/X86/fma-fneg-combine.ll +++ b/llvm/test/CodeGen/X86/fma-fneg-combine.ll @@ -226,11 +226,19 @@ } define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) { -; CHECK-LABEL: test10: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 -; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: retq +; SKX-LABEL: test10: +; SKX: # %bb.0: # %entry +; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; SKX-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; SKX-NEXT: retq +; +; KNL-LABEL: test10: +; KNL: # %bb.0: # %entry +; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; KNL-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; KNL-NEXT: # xmm1 = mem[0,0] +; KNL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retq entry: %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1, i32 4) #2 %sub.i = fsub <2 x double> , %0 @@ -305,7 +313,7 @@ define <2 x double> @test13(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { ; SKX-LABEL: test13: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 +; SKX-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm3 ; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2 ; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k1} @@ -314,7 +322,9 @@ ; ; KNL-LABEL: test13: ; KNL: # %bb.0: # %entry -; KNL-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 +; KNL-NEXT: vmovddup {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0] +; KNL-NEXT: # xmm3 = mem[0,0] +; KNL-NEXT: vxorpd %xmm3, %xmm0, %xmm3 ; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2 ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k1} diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -1481,7 +1481,8 @@ ; ; AVX512-INFS-LABEL: test_v2f64_interp: ; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0] +; AVX512-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] +; AVX512-INFS-NEXT: # xmm3 = mem[0,0] ; AVX512-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 ; AVX512-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 ; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll b/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll @@ -226,17 +226,25 @@ ; NOFMA-NEXT: .cfi_def_cfa_offset 8 ; NOFMA-NEXT: retq ; -; FMA-LABEL: f8: -; FMA: # %bb.0: # %entry -; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 -; FMA-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-NEXT: retq +; FMA-AVX1-LABEL: f8: +; FMA-AVX1: # %bb.0: # %entry +; FMA-AVX1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; FMA-AVX1-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-AVX1-NEXT: retq ; ; FMA4-LABEL: f8: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; FMA4-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq +; +; FMA-AVX512-LABEL: f8: +; FMA-AVX512: # %bb.0: # %entry +; FMA-AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; FMA-AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; FMA-AVX512-NEXT: # xmm1 = mem[0,0] +; FMA-AVX512-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; FMA-AVX512-NEXT: retq entry: %3 = call double @llvm.experimental.constrained.fma.f64(double %0, double %1, double %2, metadata !"round.dynamic", @@ -301,17 +309,25 @@ ; NOFMA-NEXT: .cfi_def_cfa_offset 8 ; NOFMA-NEXT: retq ; -; FMA-LABEL: f10: -; FMA: # %bb.0: # %entry -; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 -; FMA-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-NEXT: retq +; FMA-AVX1-LABEL: f10: +; FMA-AVX1: # %bb.0: # %entry +; FMA-AVX1-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 +; FMA-AVX1-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-AVX1-NEXT: retq ; ; FMA4-LABEL: f10: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq +; +; FMA-AVX512-LABEL: f10: +; FMA-AVX512: # %bb.0: # %entry +; FMA-AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 +; FMA-AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; FMA-AVX512-NEXT: # xmm1 = mem[0,0] +; FMA-AVX512-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; FMA-AVX512-NEXT: retq entry: %3 = fneg double %0 %4 = fneg double %2 @@ -930,17 +946,25 @@ ; NOFMA-NEXT: .cfi_def_cfa_offset 8 ; NOFMA-NEXT: retq ; -; FMA-LABEL: f26: -; FMA: # %bb.0: # %entry -; FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 -; FMA-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-NEXT: retq +; FMA-AVX1-LABEL: f26: +; FMA-AVX1: # %bb.0: # %entry +; FMA-AVX1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; FMA-AVX1-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-AVX1-NEXT: retq ; ; FMA4-LABEL: f26: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 ; FMA4-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq +; +; FMA-AVX512-LABEL: f26: +; FMA-AVX512: # %bb.0: # %entry +; FMA-AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; FMA-AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; FMA-AVX512-NEXT: # xmm1 = mem[0,0] +; FMA-AVX512-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; FMA-AVX512-NEXT: retq entry: %3 = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %0, <2 x double> %1, <2 x double> %2, metadata !"round.dynamic", @@ -1053,17 +1077,25 @@ ; NOFMA-NEXT: .cfi_def_cfa_offset 8 ; NOFMA-NEXT: retq ; -; FMA-LABEL: f28: -; FMA: # %bb.0: # %entry -; FMA-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 -; FMA-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-NEXT: retq +; FMA-AVX1-LABEL: f28: +; FMA-AVX1: # %bb.0: # %entry +; FMA-AVX1-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 +; FMA-AVX1-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-AVX1-NEXT: retq ; ; FMA4-LABEL: f28: ; FMA4: # %bb.0: # %entry ; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 ; FMA4-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; FMA4-NEXT: retq +; +; FMA-AVX512-LABEL: f28: +; FMA-AVX512: # %bb.0: # %entry +; FMA-AVX512-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 +; FMA-AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; FMA-AVX512-NEXT: # xmm1 = mem[0,0] +; FMA-AVX512-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; FMA-AVX512-NEXT: retq entry: %3 = fneg <2 x double> %0 %4 = fneg <2 x double> %2 diff --git a/llvm/test/CodeGen/X86/fp-round.ll b/llvm/test/CodeGen/X86/fp-round.ll --- a/llvm/test/CodeGen/X86/fp-round.ll +++ b/llvm/test/CodeGen/X86/fp-round.ll @@ -138,7 +138,7 @@ ; AVX512-LABEL: round_f64: ; AVX512: ## %bb.0: ; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1] -; AVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: retq @@ -239,8 +239,8 @@ ; ; AVX512-LABEL: round_v2f64: ; AVX512: ## %bb.0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1] -; AVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1] +; AVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1 ; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vroundpd $11, %xmm0, %xmm0 ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -1315,22 +1315,6 @@ ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: retl $4 -; -; X64-AVX-LABEL: TestTruncCopysign: -; X64-AVX: # %bb.0: # %entry -; X64-AVX-NEXT: cmpl $50001, %edi # imm = 0xC351 -; X64-AVX-NEXT: jl .LBB26_2 -; X64-AVX-NEXT: # %bb.1: # %if.then -; X64-AVX-NEXT: pushq %rax -; X64-AVX-NEXT: callq __trunctfdf2@PLT -; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [+Inf,+Inf] -; X64-AVX-NEXT: # xmm1 = mem[0,0] -; X64-AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: callq __extenddftf2@PLT -; X64-AVX-NEXT: addq $8, %rsp -; X64-AVX-NEXT: .LBB26_2: # %cleanup -; X64-AVX-NEXT: retq entry: %cmp = icmp sgt i32 %n, 50000 br i1 %cmp, label %if.then, label %cleanup diff --git a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll --- a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll +++ b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll @@ -31,7 +31,7 @@ ; GFNIAVX512: # %bb.0: ; GFNIAVX512-NEXT: vpsllw $3, %xmm0, %xmm2 ; GFNIAVX512-NEXT: vpsrlw $5, %xmm1, %xmm0 -; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0 ; GFNIAVX512-NEXT: retq %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) ret <16 x i8> %res diff --git a/llvm/test/CodeGen/X86/gfni-rotates.ll b/llvm/test/CodeGen/X86/gfni-rotates.ll --- a/llvm/test/CodeGen/X86/gfni-rotates.ll +++ b/llvm/test/CodeGen/X86/gfni-rotates.ll @@ -32,7 +32,7 @@ ; GFNIAVX512: # %bb.0: ; GFNIAVX512-NEXT: vpsllw $3, %xmm0, %xmm1 ; GFNIAVX512-NEXT: vpsrlw $5, %xmm0, %xmm0 -; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; GFNIAVX512-NEXT: retq %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> ) ret <16 x i8> %res diff --git a/llvm/test/CodeGen/X86/i64-to-float.ll b/llvm/test/CodeGen/X86/i64-to-float.ll --- a/llvm/test/CodeGen/X86/i64-to-float.ll +++ b/llvm/test/CodeGen/X86/i64-to-float.ll @@ -366,16 +366,16 @@ ; ; X64-AVX512F-LABEL: clamp_sitofp_2i64_2f64: ; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX512F-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512F-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; X64-AVX512F-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; X64-AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512DQ-LABEL: clamp_sitofp_2i64_2f64: ; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX512DQ-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; X64-AVX512DQ-NEXT: vcvtqq2pd %xmm0, %xmm0 ; X64-AVX512DQ-NEXT: retq %clo = icmp slt <2 x i64> %a, diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll --- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll @@ -2499,8 +2499,8 @@ ; AVX512VL-LABEL: truncstore_v2i64_v2i32: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1 -; AVX512VL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512VL-NEXT: vpmovqd %xmm0, (%rdi) {%k1} ; AVX512VL-NEXT: retq ; @@ -2681,8 +2681,8 @@ ; AVX512BWVL-LABEL: truncstore_v2i64_v2i16: ; AVX512BWVL: # %bb.0: ; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1 -; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vpmovqw %xmm0, (%rdi) {%k1} ; AVX512BWVL-NEXT: retq %a = icmp ne <2 x i64> %mask, zeroinitializer @@ -2850,8 +2850,8 @@ ; AVX512BWVL-LABEL: truncstore_v2i64_v2i8: ; AVX512BWVL: # %bb.0: ; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1 -; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vpmovqb %xmm0, (%rdi) {%k1} ; AVX512BWVL-NEXT: retq %a = icmp ne <2 x i64> %mask, zeroinitializer diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll --- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll @@ -2178,7 +2178,7 @@ ; AVX512VL-LABEL: truncstore_v2i64_v2i32: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1 -; AVX512VL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512VL-NEXT: vpmovqd %xmm0, (%rdi) {%k1} ; AVX512VL-NEXT: retq ; @@ -2340,7 +2340,7 @@ ; AVX512BWVL-LABEL: truncstore_v2i64_v2i16: ; AVX512BWVL: # %bb.0: ; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1 -; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vpmovqw %xmm0, (%rdi) {%k1} ; AVX512BWVL-NEXT: retq %a = icmp ne <2 x i64> %mask, zeroinitializer @@ -2489,7 +2489,7 @@ ; AVX512BWVL-LABEL: truncstore_v2i64_v2i8: ; AVX512BWVL: # %bb.0: ; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1 -; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vpmovqb %xmm0, (%rdi) {%k1} ; AVX512BWVL-NEXT: retq %a = icmp ne <2 x i64> %mask, zeroinitializer diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll --- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll @@ -1018,7 +1018,7 @@ ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 ; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} ; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm2 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm1 @@ -1054,7 +1054,7 @@ ; AVX512BW-FALLBACK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-FALLBACK-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 ; AVX512BW-FALLBACK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX512BW-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512BW-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] ; AVX512BW-FALLBACK-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} ; AVX512BW-FALLBACK-NEXT: vpminsq %zmm1, %zmm0, %zmm2 ; AVX512BW-FALLBACK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm1 @@ -1241,7 +1241,7 @@ ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} ; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm2 ; AVX512F-NEXT: vpmaxuq %zmm1, %zmm0, %zmm1 @@ -1277,7 +1277,7 @@ ; AVX512BW-FALLBACK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-FALLBACK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ; AVX512BW-FALLBACK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX512BW-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512BW-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] ; AVX512BW-FALLBACK-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} ; AVX512BW-FALLBACK-NEXT: vpminuq %zmm1, %zmm0, %zmm2 ; AVX512BW-FALLBACK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm1 @@ -1465,7 +1465,7 @@ ; AVX512F-NEXT: vmovdqa (%rdi), %xmm1 ; AVX512F-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} ; AVX512F-NEXT: vpminsq %zmm0, %zmm1, %zmm2 ; AVX512F-NEXT: vpmaxsq %zmm0, %zmm1, %zmm0 @@ -1502,7 +1502,7 @@ ; AVX512BW-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1 ; AVX512BW-FALLBACK-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ; AVX512BW-FALLBACK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX512BW-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512BW-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] ; AVX512BW-FALLBACK-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} ; AVX512BW-FALLBACK-NEXT: vpminsq %zmm0, %zmm1, %zmm2 ; AVX512BW-FALLBACK-NEXT: vpmaxsq %zmm0, %zmm1, %zmm0 @@ -1688,7 +1688,7 @@ ; AVX512F-NEXT: vmovdqa (%rdi), %xmm1 ; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 ; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} ; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm2 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm1 @@ -1725,7 +1725,7 @@ ; AVX512BW-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1 ; AVX512BW-FALLBACK-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 ; AVX512BW-FALLBACK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX512BW-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512BW-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] ; AVX512BW-FALLBACK-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} ; AVX512BW-FALLBACK-NEXT: vpminsq %zmm1, %zmm0, %zmm2 ; AVX512BW-FALLBACK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm1 @@ -1916,7 +1916,7 @@ ; AVX512F-NEXT: vmovdqa (%rsi), %xmm1 ; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 ; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} ; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm2 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm1 @@ -1954,7 +1954,7 @@ ; AVX512BW-FALLBACK-NEXT: vmovdqa (%rsi), %xmm1 ; AVX512BW-FALLBACK-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 ; AVX512BW-FALLBACK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX512BW-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512BW-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] ; AVX512BW-FALLBACK-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} ; AVX512BW-FALLBACK-NEXT: vpminsq %zmm1, %zmm0, %zmm2 ; AVX512BW-FALLBACK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm1 diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -2118,8 +2118,7 @@ ; KNL-LABEL: allones_v2i64_and1: ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1] -; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0 +; KNL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb $3, %al ; KNL-NEXT: sete %al @@ -2128,7 +2127,7 @@ ; ; SKX-LABEL: allones_v2i64_and1: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0 +; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: cmpb $3, %al ; SKX-NEXT: sete %al @@ -2160,8 +2159,7 @@ ; KNL-LABEL: allzeros_v2i64_and1: ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1] -; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0 +; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb $3, %al ; KNL-NEXT: sete %al @@ -2170,7 +2168,7 @@ ; ; SKX-LABEL: allzeros_v2i64_and1: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0 +; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0 ; SKX-NEXT: kortestb %k0, %k0 ; SKX-NEXT: sete %al ; SKX-NEXT: retq @@ -3407,8 +3405,7 @@ ; KNL-LABEL: allones_v2i64_and4: ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] -; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0 +; KNL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb $3, %al ; KNL-NEXT: sete %al @@ -3417,7 +3414,7 @@ ; ; SKX-LABEL: allones_v2i64_and4: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0 +; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: cmpb $3, %al ; SKX-NEXT: sete %al @@ -3449,8 +3446,7 @@ ; KNL-LABEL: allzeros_v2i64_and4: ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] -; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0 +; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb $3, %al ; KNL-NEXT: sete %al @@ -3459,7 +3455,7 @@ ; ; SKX-LABEL: allzeros_v2i64_and4: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0 +; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0 ; SKX-NEXT: kortestb %k0, %k0 ; SKX-NEXT: sete %al ; SKX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -120,15 +120,25 @@ ; SSE-NEXT: paddq %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: mul_v2i64c: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117] -; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 -; AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; AVX-NEXT: retq +; AVX2-LABEL: mul_v2i64c: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117] +; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: mul_v2i64c: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [117,117] +; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX512-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: retq entry: %A = mul <2 x i64> %i, < i64 117, i64 117 > ret <2 x i64> %A diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -1217,8 +1217,11 @@ ; AVX512F-LABEL: v2i64: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm2 -; AVX512F-NEXT: vmovapd {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX512F-NEXT: vblendvpd %xmm2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX512F-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775807,9223372036854775807] +; AVX512F-NEXT: # xmm3 = mem[0,0] +; AVX512F-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX512F-NEXT: # xmm4 = mem[0,0] +; AVX512F-NEXT: vblendvpd %xmm2, %xmm3, %xmm4, %xmm3 ; AVX512F-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0 ; AVX512F-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm0 @@ -1232,8 +1235,8 @@ ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k1 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm2, %k2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; AVX512BW-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k2} +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] +; AVX512BW-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k2} ; AVX512BW-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} ; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll --- a/llvm/test/CodeGen/X86/sat-add.ll +++ b/llvm/test/CodeGen/X86/sat-add.ll @@ -667,8 +667,8 @@ ; ; AVX512-LABEL: unsigned_sat_constant_v2i64_using_min: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512-NEXT: retq %c = icmp ult <2 x i64> %x, %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> @@ -736,8 +736,8 @@ ; ; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512-NEXT: retq %a = add <2 x i64> %x, %c = icmp ugt <2 x i64> %x, %a @@ -803,8 +803,8 @@ ; ; AVX512-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512-NEXT: retq %a = add <2 x i64> %x, %c = icmp ugt <2 x i64> %x, diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll --- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -851,13 +851,23 @@ ; SSE-NEXT: divsd %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: div_sqrt_fabs_f64: -; AVX: # %bb.0: -; AVX-NEXT: vsqrtsd %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vmulsd %xmm1, %xmm2, %xmm1 -; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: div_sqrt_fabs_f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vsqrtsd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vmulsd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: div_sqrt_fabs_f64: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovddup {{.*#+}} xmm3 = [NaN,NaN] +; AVX512-NEXT: # xmm3 = mem[0,0] +; AVX512-NEXT: vsqrtsd %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vandpd %xmm3, %xmm1, %xmm1 +; AVX512-NEXT: vmulsd %xmm1, %xmm2, %xmm1 +; AVX512-NEXT: vdivsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq %s = call fast double @llvm.sqrt.f64(double %z) %a = call fast double @llvm.fabs.f64(double %y) %m = fmul fast double %s, %a @@ -1048,13 +1058,22 @@ ; SSE-NEXT: movupd %xmm1, (%rdi) ; SSE-NEXT: retq ; -; AVX-LABEL: sqrt_simplify_before_recip_vec: -; AVX: # %bb.0: -; AVX-NEXT: vsqrtpd %xmm0, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,1.0E+0] -; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vmovupd %xmm1, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: sqrt_simplify_before_recip_vec: +; AVX1: # %bb.0: +; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,1.0E+0] +; AVX1-NEXT: vdivpd %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vmovupd %xmm1, (%rdi) +; AVX1-NEXT: retq +; +; AVX512-LABEL: sqrt_simplify_before_recip_vec: +; AVX512: # %bb.0: +; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 +; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0] +; AVX512-NEXT: # xmm1 = mem[0,0] +; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm1 +; AVX512-NEXT: vmovupd %xmm1, (%rdi) +; AVX512-NEXT: retq %sqrt = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) %rsqrt = fdiv fast <2 x double> , %sqrt %sqrt_fast = fdiv fast <2 x double> %x, %sqrt diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll --- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll @@ -1316,8 +1316,11 @@ ; AVX512F-NEXT: vpsubq %xmm1, %xmm0, %xmm1 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpxor %xmm0, %xmm2, %xmm0 -; AVX512F-NEXT: vmovapd {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX512F-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775807,9223372036854775807] +; AVX512F-NEXT: # xmm2 = mem[0,0] +; AVX512F-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX512F-NEXT: # xmm3 = mem[0,0] +; AVX512F-NEXT: vblendvpd %xmm1, %xmm2, %xmm3, %xmm2 ; AVX512F-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 ; AVX512F-NEXT: retq ; @@ -1329,8 +1332,8 @@ ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k1 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm2, %k2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; AVX512BW-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k2} +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] +; AVX512BW-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k2} ; AVX512BW-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} ; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll --- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll @@ -257,11 +257,11 @@ ; ; CHECK-AVX512VL-LABEL: t3_wide: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411] +; CHECK-AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411] ; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; CHECK-AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; CHECK-AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpsllq $32, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpaddq %xmm0, %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll --- a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll +++ b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll @@ -32,7 +32,7 @@ define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind { ; CHECK-LABEL: v2f64: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; CHECK-NEXT: retq %tmp = tail call <2 x double> @llvm.copysign.v2f64( <2 x double> %a, <2 x double> %b ) ret <2 x double> %tmp diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll --- a/llvm/test/CodeGen/X86/vec_fabs.ll +++ b/llvm/test/CodeGen/X86/vec_fabs.ll @@ -7,15 +7,35 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VLDQ define <2 x double> @fabs_v2f64(<2 x double> %p) { -; X86-LABEL: fabs_v2f64: -; X86: # %bb.0: -; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; X86-NEXT: retl +; X86-AVX-LABEL: fabs_v2f64: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: retl ; -; X64-LABEL: fabs_v2f64: -; X64: # %bb.0: -; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-NEXT: retq +; X86-AVX512VL-LABEL: fabs_v2f64: +; X86-AVX512VL: # %bb.0: +; X86-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 +; X86-AVX512VL-NEXT: retl +; +; X86-AVX512VLDQ-LABEL: fabs_v2f64: +; X86-AVX512VLDQ: # %bb.0: +; X86-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 +; X86-AVX512VLDQ-NEXT: retl +; +; X64-AVX-LABEL: fabs_v2f64: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: retq +; +; X64-AVX512VL-LABEL: fabs_v2f64: +; X64-AVX512VL: # %bb.0: +; X64-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; X64-AVX512VL-NEXT: retq +; +; X64-AVX512VLDQ-LABEL: fabs_v2f64: +; X64-AVX512VLDQ: # %bb.0: +; X64-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; X64-AVX512VLDQ-NEXT: retq %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p) ret <2 x double> %t } diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -615,10 +615,14 @@ ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4841369599423283200,4841369599423283200] +; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4985484787499139072,4985484787499139072] +; AVX512F-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512F-NEXT: # xmm2 = mem[0,0] +; AVX512F-NEXT: vsubpd %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX512F-NEXT: retq ; @@ -626,10 +630,10 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512VL-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512VL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX512VL-NEXT: retq ; @@ -3288,10 +3292,14 @@ ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4841369599423283200,4841369599423283200] +; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4985484787499139072,4985484787499139072] +; AVX512F-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512F-NEXT: # xmm2 = mem[0,0] +; AVX512F-NEXT: vsubpd %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX512F-NEXT: retq ; @@ -3300,10 +3308,10 @@ ; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512VL-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512VL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX512VL-NEXT: retq ; @@ -5684,15 +5692,17 @@ ; ; AVX512F-LABEL: PR43609: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] ; AVX512F-NEXT: vpor %xmm4, %xmm3, %xmm3 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] ; AVX512F-NEXT: vpor %xmm5, %xmm0, %xmm0 -; AVX512F-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512F-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512F-NEXT: # xmm6 = mem[0,0] ; AVX512F-NEXT: vsubpd %xmm6, %xmm0, %xmm0 ; AVX512F-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] @@ -5701,7 +5711,8 @@ ; AVX512F-NEXT: vpor %xmm5, %xmm1, %xmm1 ; AVX512F-NEXT: vsubpd %xmm6, %xmm1, %xmm1 ; AVX512F-NEXT: vaddpd %xmm1, %xmm2, %xmm1 -; AVX512F-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512F-NEXT: # xmm2 = mem[0,0] ; AVX512F-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ; AVX512F-NEXT: vmovupd %xmm0, (%rdi) @@ -5710,15 +5721,16 @@ ; ; AVX512VL-LABEL: PR43609: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] ; AVX512VL-NEXT: vpor %xmm4, %xmm3, %xmm3 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] ; AVX512VL-NEXT: vpor %xmm5, %xmm0, %xmm0 -; AVX512VL-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512VL-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512VL-NEXT: # xmm6 = mem[0,0] ; AVX512VL-NEXT: vsubpd %xmm6, %xmm0, %xmm0 ; AVX512VL-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] @@ -5727,7 +5739,8 @@ ; AVX512VL-NEXT: vpor %xmm5, %xmm1, %xmm1 ; AVX512VL-NEXT: vsubpd %xmm6, %xmm1, %xmm1 ; AVX512VL-NEXT: vaddpd %xmm1, %xmm2, %xmm1 -; AVX512VL-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512VL-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512VL-NEXT: # xmm2 = mem[0,0] ; AVX512VL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovupd %xmm0, (%rdi) @@ -5737,10 +5750,12 @@ ; AVX512DQ-LABEL: PR43609: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512DQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; AVX512DQ-NEXT: vpaddq %xmm1, %xmm0, %xmm1 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm1, %zmm1 -; AVX512DQ-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512DQ-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512DQ-NEXT: # xmm2 = mem[0,0] ; AVX512DQ-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ; AVX512DQ-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ; AVX512DQ-NEXT: vmovupd %xmm0, (%rdi) @@ -5750,10 +5765,11 @@ ; ; AVX512VLDQ-LABEL: PR43609: ; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512VLDQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1 ; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0 ; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm1, %xmm1 -; AVX512VLDQ-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512VLDQ-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512VLDQ-NEXT: # xmm2 = mem[0,0] ; AVX512VLDQ-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ; AVX512VLDQ-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ; AVX512VLDQ-NEXT: vmovupd %xmm0, (%rdi) diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -470,10 +470,21 @@ ; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v16i8: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v16i8: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v16i8: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v16i8: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) ret <16 x i8> %b } @@ -549,11 +560,24 @@ ; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v8i16: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v8i16: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v8i16: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v8i16: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) ret <8 x i16> %b } @@ -634,11 +658,24 @@ ; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v4i32: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v4i32: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v4i32: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v4i32: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) ret <4 x i32> %b } @@ -721,11 +758,24 @@ ; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; GFNISSE-NEXT: retq ; -; GFNIAVX-LABEL: test_bitreverse_v2i64: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v2i64: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v2i64: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v2i64: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq %b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) ret <2 x i64> %b } diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -32,11 +32,19 @@ ; CHECK-NEXT: divpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_fdiv_v2f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] -; AVX-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fdiv_v2f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] +; AVX1-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fdiv_v2f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = [1.0E+1,1.0E+1] +; AVX512-NEXT: # xmm0 = mem[0,0] +; AVX512-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0] +; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq entry: %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64( <2 x double> , @@ -95,14 +103,25 @@ ; CHECK-NEXT: wait ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_fdiv_v3f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0] -; AVX-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fdiv_v3f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0] +; AVX1-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fdiv_v3f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = [1.0E+1,1.0E+1] +; AVX512-NEXT: # xmm0 = mem[0,0] +; AVX512-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0] +; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX512-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: retq entry: %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64( <3 x double> , @@ -429,11 +448,18 @@ ; CHECK-NEXT: mulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_fmul_v2f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] -; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fmul_v2f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX1-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fmul_v2f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX512-NEXT: # xmm0 = mem[0,0] +; AVX512-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: retq entry: %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64( <2 x double> , @@ -489,14 +515,24 @@ ; CHECK-NEXT: wait ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_fmul_v3f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] -; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fmul_v3f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX1-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fmul_v3f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX512-NEXT: # xmm1 = mem[0,0] +; AVX512-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: retq entry: %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64( <3 x double> @llvm.experimental.constrained.fadd.v2f64( <2 x double> , @@ -627,14 +670,24 @@ ; CHECK-NEXT: wait ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_fadd_v3f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] -; AVX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fadd_v3f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX1-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fadd_v3f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX512-NEXT: # xmm1 = mem[0,0] +; AVX512-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: retq entry: %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64( <3 x double> @llvm.experimental.constrained.fsub.v2f64( <2 x double> , @@ -767,15 +827,26 @@ ; CHECK-NEXT: wait ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_fsub_v3f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; AVX-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fsub_v3f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fsub_v3f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX512-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX512-NEXT: # xmm1 = mem[0,0] +; AVX512-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: retq entry: %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64( <3 x double> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v8i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlq $1, %zmm1, %zmm1 ; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 @@ -437,7 +437,7 @@ ; ; AVX512VL-LABEL: splatvar_funnnel_v8i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlq $1, %zmm1, %zmm1 ; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 @@ -448,7 +448,7 @@ ; ; AVX512BW-LABEL: splatvar_funnnel_v8i64: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlq $1, %zmm1, %zmm1 ; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 @@ -465,7 +465,7 @@ ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlq $1, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -1834,7 +1834,7 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: @@ -1850,7 +1850,7 @@ ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: @@ -1866,7 +1866,7 @@ ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -95,7 +95,7 @@ ; ; AVX512F-LABEL: var_funnnel_v2i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -106,7 +106,7 @@ ; ; AVX512VL-LABEL: var_funnnel_v2i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -117,7 +117,7 @@ ; ; AVX512BW-LABEL: var_funnnel_v2i64: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -138,7 +138,7 @@ ; ; AVX512VLBW-LABEL: var_funnnel_v2i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -1054,7 +1054,7 @@ ; ; AVX512F-LABEL: splatvar_funnnel_v2i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -1065,7 +1065,7 @@ ; ; AVX512VL-LABEL: splatvar_funnnel_v2i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -1076,7 +1076,7 @@ ; ; AVX512BW-LABEL: splatvar_funnnel_v2i64: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -1097,7 +1097,7 @@ ; ; AVX512VLBW-LABEL: splatvar_funnnel_v2i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -2317,7 +2317,7 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm2 ; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: @@ -2342,14 +2342,14 @@ ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm2 ; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm2 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -803,7 +803,7 @@ ; ; AVX512F-LABEL: splatvar_funnnel_v4i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -814,7 +814,7 @@ ; ; AVX512VL-LABEL: splatvar_funnnel_v4i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -825,7 +825,7 @@ ; ; AVX512BW-LABEL: splatvar_funnnel_v4i64: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -845,7 +845,7 @@ ; ; AVX512VLBW-LABEL: splatvar_funnnel_v4i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -424,7 +424,7 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v8i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -435,7 +435,7 @@ ; ; AVX512VL-LABEL: splatvar_funnnel_v8i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -446,7 +446,7 @@ ; ; AVX512BW-LABEL: splatvar_funnnel_v8i64: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -464,7 +464,7 @@ ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -1880,7 +1880,7 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: @@ -1896,7 +1896,7 @@ ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: @@ -1912,7 +1912,7 @@ ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll --- a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll @@ -16982,14 +16982,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -17086,7 +17087,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -17094,7 +17095,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_2_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -17269,14 +17270,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -17287,7 +17289,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -17296,7 +17299,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -17455,7 +17458,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -17463,7 +17466,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_3_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -17474,7 +17477,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -17484,7 +17487,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -17641,14 +17644,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -17659,7 +17663,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -17668,7 +17673,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -17827,7 +17832,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -17835,7 +17840,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_4_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -17846,7 +17851,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -17856,7 +17861,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -18013,14 +18018,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -18031,7 +18037,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -18040,7 +18047,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -18199,7 +18206,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -18207,7 +18214,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_5_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -18218,7 +18225,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -18228,7 +18235,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -18385,14 +18392,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -18403,7 +18411,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -18412,7 +18421,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -18571,7 +18580,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -18579,7 +18588,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_6_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -18590,7 +18599,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -18600,7 +18609,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -18757,14 +18766,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -18775,7 +18785,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -18784,7 +18795,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -18943,7 +18954,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -18951,7 +18962,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_7_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -18962,7 +18973,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -18972,7 +18983,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -19129,14 +19140,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_7_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -19147,7 +19159,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -19156,7 +19169,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -19315,7 +19328,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -19323,7 +19336,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_8_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -19334,7 +19347,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -19344,7 +19357,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -19501,14 +19514,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_8_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -19519,7 +19533,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -19528,7 +19543,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -19687,7 +19702,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -19695,7 +19710,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_9_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -19706,7 +19721,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -19716,7 +19731,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -19873,14 +19888,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_9_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -19891,7 +19907,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -19900,7 +19917,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -20059,7 +20076,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -20067,7 +20084,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_10_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -20078,7 +20095,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -20088,7 +20105,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -20245,14 +20262,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_10_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -20263,7 +20281,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -20272,7 +20291,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -20431,7 +20450,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -20439,7 +20458,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_11_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -20450,7 +20469,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -20460,7 +20479,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -20617,14 +20636,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_11_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -20635,7 +20655,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -20644,7 +20665,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -20803,7 +20824,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -20811,7 +20832,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_12_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -20822,7 +20843,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -20832,7 +20853,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -20989,14 +21010,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_12_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -21007,7 +21029,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -21016,7 +21039,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -21175,7 +21198,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -21183,7 +21206,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_13_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -21194,7 +21217,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -21204,7 +21227,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -21361,14 +21384,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_13_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -21379,7 +21403,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -21388,7 +21413,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -21547,7 +21572,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -21555,7 +21580,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_14_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -21566,7 +21591,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -21576,7 +21601,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -21733,14 +21758,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_14_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -21751,7 +21777,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -21760,7 +21787,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -21919,7 +21946,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -21927,7 +21954,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_15_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -21938,7 +21965,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -21948,7 +21975,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -22105,14 +22132,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_15_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -22123,7 +22151,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -22132,7 +22161,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -22291,7 +22320,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -22299,7 +22328,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_16_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -22310,7 +22339,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -22320,7 +22349,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -22477,14 +22506,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_16_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -22495,7 +22525,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -22504,7 +22535,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -22663,7 +22694,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -22671,7 +22702,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_17_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -22682,7 +22713,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -22692,7 +22723,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -22849,14 +22880,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_17_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -22867,7 +22899,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -22876,7 +22909,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -23035,7 +23068,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -23043,7 +23076,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_18_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -23054,7 +23087,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -23064,7 +23097,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -23221,14 +23254,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_18_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -23239,7 +23273,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -23248,7 +23283,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -23407,7 +23442,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -23415,7 +23450,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_19_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -23426,7 +23461,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -23436,7 +23471,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -23593,14 +23628,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_19_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -23611,7 +23647,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -23620,7 +23657,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -23779,7 +23816,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -23787,7 +23824,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_20_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -23798,7 +23835,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -23808,7 +23845,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -23965,14 +24002,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_20_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -23983,7 +24021,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -23992,7 +24031,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -24151,7 +24190,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -24159,7 +24198,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_21_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -24170,7 +24209,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -24180,7 +24219,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -24337,14 +24376,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_21_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -24355,7 +24395,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -24364,7 +24405,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -24523,7 +24564,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -24531,7 +24572,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_22_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -24542,7 +24583,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -24552,7 +24593,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -24709,14 +24750,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_22_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -24727,7 +24769,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -24736,7 +24779,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -24895,7 +24938,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -24903,7 +24946,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_23_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -24914,7 +24957,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -24924,7 +24967,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -25081,14 +25124,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_23_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -25099,7 +25143,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -25108,7 +25153,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -25267,7 +25312,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -25275,7 +25320,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_24_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -25286,7 +25331,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -25296,7 +25341,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -25453,14 +25498,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_24_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -25471,7 +25517,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -25480,7 +25527,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -25639,7 +25686,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -25647,7 +25694,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_25_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -25658,7 +25705,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -25668,7 +25715,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -25825,14 +25872,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_25_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -25843,7 +25891,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -25852,7 +25901,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -26011,7 +26060,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -26019,7 +26068,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_26_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -26030,7 +26079,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -26040,7 +26089,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -26197,14 +26246,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_26_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -26215,7 +26265,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -26224,7 +26275,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -26383,7 +26434,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -26391,7 +26442,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_27_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -26402,7 +26453,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -26412,7 +26463,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -26569,14 +26620,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_27_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -26587,7 +26639,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -26596,7 +26649,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -26755,7 +26808,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -26763,7 +26816,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_28_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -26774,7 +26827,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -26784,7 +26837,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -26941,14 +26994,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_28_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -26959,7 +27013,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -26968,7 +27023,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -27127,7 +27182,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -27135,7 +27190,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_29_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -27146,7 +27201,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -27156,7 +27211,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -27313,14 +27368,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_29_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -27331,7 +27387,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -27340,7 +27397,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -27499,7 +27556,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -27507,7 +27564,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_30_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -27518,7 +27575,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -27528,7 +27585,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -27685,14 +27742,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_30_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -27703,7 +27761,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -27712,7 +27771,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -27871,7 +27930,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -27879,7 +27938,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_31_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -27890,7 +27949,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -27900,7 +27959,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -28057,14 +28116,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_31_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -28075,7 +28135,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -28084,7 +28145,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -28243,7 +28304,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -28251,7 +28312,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_32_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -28262,7 +28323,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -28272,7 +28333,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -28429,14 +28490,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_32_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -28447,7 +28509,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -28456,7 +28519,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -28615,7 +28678,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -28623,7 +28686,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_33_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -28634,7 +28697,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -28644,7 +28707,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -28801,14 +28864,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_33_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -28819,7 +28883,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -28828,7 +28893,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -28987,7 +29052,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -28995,7 +29060,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_34_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -29006,7 +29071,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -29016,7 +29081,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -29173,14 +29238,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_34_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -29191,7 +29257,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -29200,7 +29267,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -29359,7 +29426,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -29367,7 +29434,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_35_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -29378,7 +29445,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -29388,7 +29455,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -29545,14 +29612,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_35_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -29563,7 +29631,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -29572,7 +29641,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -29731,7 +29800,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -29739,7 +29808,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_36_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -29750,7 +29819,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -29760,7 +29829,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -29917,14 +29986,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_36_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -29935,7 +30005,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -29944,7 +30015,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -30103,7 +30174,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -30111,7 +30182,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_37_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -30122,7 +30193,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -30132,7 +30203,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -30289,14 +30360,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_37_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -30307,7 +30379,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -30316,7 +30389,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -30475,7 +30548,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -30483,7 +30556,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_38_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -30494,7 +30567,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -30504,7 +30577,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -30661,14 +30734,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_38_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -30679,7 +30753,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -30688,7 +30763,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -30847,7 +30922,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -30855,7 +30930,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_39_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -30866,7 +30941,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -30876,7 +30951,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -31033,14 +31108,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_39_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -31051,7 +31127,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -31060,7 +31137,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -31219,7 +31296,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -31227,7 +31304,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_40_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -31238,7 +31315,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -31248,7 +31325,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -31405,14 +31482,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_40_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -31423,7 +31501,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -31432,7 +31511,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -31591,7 +31670,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -31599,7 +31678,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_41_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -31610,7 +31689,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -31620,7 +31699,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -31777,14 +31856,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_41_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -31795,7 +31875,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -31804,7 +31885,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -31963,7 +32044,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -31971,7 +32052,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_42_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -31982,7 +32063,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -31992,7 +32073,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -32149,14 +32230,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_42_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -32167,7 +32249,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -32176,7 +32259,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -32335,7 +32418,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -32343,7 +32426,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_43_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -32354,7 +32437,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -32364,7 +32447,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -32521,14 +32604,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_43_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -32539,7 +32623,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -32548,7 +32633,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -32707,7 +32792,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -32715,7 +32800,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_44_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -32726,7 +32811,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -32736,7 +32821,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -32893,14 +32978,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_44_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -32911,7 +32997,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -32920,7 +33007,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -33079,7 +33166,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -33087,7 +33174,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_45_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -33098,7 +33185,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -33108,7 +33195,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -33265,14 +33352,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_45_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -33283,7 +33371,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -33292,7 +33381,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -33451,7 +33540,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -33459,7 +33548,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_46_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -33470,7 +33559,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -33480,7 +33569,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -33637,14 +33726,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_46_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -33655,7 +33745,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -33664,7 +33755,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -33823,7 +33914,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -33831,7 +33922,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_47_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -33842,7 +33933,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -33852,7 +33943,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -34009,14 +34100,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_47_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -34027,7 +34119,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -34036,7 +34129,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -34195,7 +34288,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -34203,7 +34296,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_48_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -34214,7 +34307,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -34224,7 +34317,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -34381,14 +34474,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_48_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -34399,7 +34493,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -34408,7 +34503,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -34567,7 +34662,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -34575,7 +34670,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_49_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -34586,7 +34681,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -34596,7 +34691,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -34753,14 +34848,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_49_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -34771,7 +34867,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -34780,7 +34877,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -34939,7 +35036,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -34947,7 +35044,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_50_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -34958,7 +35055,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -34968,7 +35065,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -35125,14 +35222,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_50_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -35143,7 +35241,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -35152,7 +35251,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -35311,7 +35410,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -35319,7 +35418,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_51_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -35330,7 +35429,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -35340,7 +35439,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -35497,14 +35596,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_51_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -35515,7 +35615,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -35524,7 +35625,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -35683,7 +35784,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -35691,7 +35792,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_52_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -35702,7 +35803,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -35712,7 +35813,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -35869,14 +35970,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_52_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -35887,7 +35989,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -35896,7 +35999,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -36055,7 +36158,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -36063,7 +36166,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_53_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -36074,7 +36177,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -36084,7 +36187,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -36241,14 +36344,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_53_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -36259,7 +36363,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -36268,7 +36373,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -36427,7 +36532,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -36435,7 +36540,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_54_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -36446,7 +36551,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -36456,7 +36561,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -36613,14 +36718,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_54_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -36631,7 +36737,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -36640,7 +36747,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -36799,7 +36906,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -36807,7 +36914,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_55_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -36818,7 +36925,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -36828,7 +36935,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -36985,14 +37092,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_55_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -37003,7 +37111,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -37012,7 +37121,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -37171,7 +37280,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -37179,7 +37288,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_56_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -37190,7 +37299,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -37200,7 +37309,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -37357,14 +37466,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_56_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -37375,7 +37485,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -37384,7 +37495,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -37543,7 +37654,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -37551,7 +37662,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_57_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -37562,7 +37673,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -37572,7 +37683,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -37729,14 +37840,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_57_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -37747,7 +37859,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -37756,7 +37869,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -37915,7 +38028,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -37923,7 +38036,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_58_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -37934,7 +38047,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -37944,7 +38057,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -38101,14 +38214,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_58_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -38119,7 +38233,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -38128,7 +38243,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -38287,7 +38402,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -38295,7 +38410,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_59_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -38306,7 +38421,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -38316,7 +38431,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -38473,14 +38588,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_59_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -38491,7 +38607,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -38500,7 +38617,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -38659,7 +38776,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -38667,7 +38784,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_60_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -38678,7 +38795,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -38688,7 +38805,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -38845,14 +38962,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_60_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -38863,7 +38981,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -38872,7 +38991,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -39031,7 +39150,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -39039,7 +39158,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_61_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -39050,7 +39169,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -39060,7 +39179,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -39217,14 +39336,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_61_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -39235,7 +39355,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -39244,7 +39365,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -39403,7 +39524,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -39411,7 +39532,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_62_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -39422,7 +39543,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -39432,7 +39553,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -39589,14 +39710,15 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: ugt_62_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -39607,7 +39729,8 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; @@ -39616,7 +39739,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq @@ -39775,7 +39898,7 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [63,63] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq @@ -39783,7 +39906,7 @@ ; AVX512VPOPCNTDQVL-LABEL: ult_63_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq @@ -39794,7 +39917,7 @@ ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [63,63] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq @@ -39804,7 +39927,7 @@ ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll --- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll @@ -746,14 +746,16 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512VPOPCNTDQ-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; ; AVX512VPOPCNTDQVL-LABEL: eq_1_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; ; BITALG_NOVLX-LABEL: eq_1_v2i64: @@ -879,7 +881,8 @@ ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512VPOPCNTDQ-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper @@ -888,7 +891,8 @@ ; AVX512VPOPCNTDQVL-LABEL: ne_1_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -836,11 +836,24 @@ ; SSE41-NEXT: sete %al ; SSE41-NEXT: retq ; -; AVX-LABEL: trunc_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; AVX1-LABEL: trunc_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX1-NEXT: sete %al +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX2-NEXT: sete %al +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc_v2i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] +; AVX512-NEXT: vptest %xmm1, %xmm0 +; AVX512-NEXT: sete %al +; AVX512-NEXT: retq %1 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a0) %2 = trunc i64 %1 to i16 %3 = icmp eq i16 %2, 0 @@ -1028,12 +1041,27 @@ ; SSE41-NEXT: sete %al ; SSE41-NEXT: retq ; -; AVX-LABEL: PR44781: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqu (%rdi), %xmm0 -; AVX-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; AVX1-LABEL: PR44781: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqu (%rdi), %xmm0 +; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX1-NEXT: sete %al +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR44781: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqu (%rdi), %xmm0 +; AVX2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX2-NEXT: sete %al +; AVX2-NEXT: retq +; +; AVX512-LABEL: PR44781: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu (%rdi), %xmm0 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64424509455,64424509455] +; AVX512-NEXT: vptest %xmm1, %xmm0 +; AVX512-NEXT: sete %al +; AVX512-NEXT: retq %2 = bitcast %struct.Box* %0 to <4 x i32>* %3 = load <4 x i32>, <4 x i32>* %2, align 4 %4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %3) diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -1783,7 +1783,7 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_v16i8: @@ -1799,7 +1799,7 @@ ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_v16i8: @@ -1815,7 +1815,7 @@ ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_rotate_v16i8: @@ -2081,7 +2081,7 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; @@ -2098,7 +2098,7 @@ ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; @@ -2115,7 +2115,7 @@ ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 ; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -964,7 +964,8 @@ ; AVX512-LABEL: splatvar_modulo_shift_v2i64: ; AVX512: # %bb.0: ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper @@ -972,7 +973,7 @@ ; ; AVX512VL-LABEL: splatvar_modulo_shift_v2i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -1055,14 +1055,15 @@ ; AVX512-LABEL: splatvar_modulo_shift_v4i64: ; AVX512: # %bb.0: ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_modulo_shift_v4i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll --- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -233,7 +233,8 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { ; ALL-LABEL: splatvar_modulo_shift_v8i64: ; ALL: # %bb.0: -; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 ; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0 ; ALL-NEXT: retq %mod = and <8 x i64> %b, diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -789,13 +789,14 @@ ; ; AVX512-LABEL: splatvar_modulo_shift_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_modulo_shift_v2i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -853,13 +853,14 @@ ; ; AVX512-LABEL: splatvar_modulo_shift_v4i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_modulo_shift_v4i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll --- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll @@ -188,7 +188,8 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { ; ALL-LABEL: splatvar_modulo_shift_v8i64: ; ALL: # %bb.0: -; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 ; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 ; ALL-NEXT: retq %mod = and <8 x i64> %b, diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -696,13 +696,14 @@ ; ; AVX512-LABEL: splatvar_modulo_shift_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_modulo_shift_v2i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -778,13 +778,14 @@ ; ; AVX512-LABEL: splatvar_modulo_shift_v4i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_modulo_shift_v4i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll @@ -181,7 +181,8 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { ; ALL-LABEL: splatvar_modulo_shift_v8i64: ; ALL: # %bb.0: -; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 ; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0 ; ALL-NEXT: retq %mod = and <8 x i64> %b, diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll --- a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll @@ -140,7 +140,7 @@ ; AVX512CDVL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 ; AVX512CDVL-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vplzcntq %xmm0, %xmm0 -; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX512CDVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64,64] ; AVX512CDVL-NEXT: vpsubq %xmm0, %xmm1, %xmm0 ; AVX512CDVL-NEXT: retq ; @@ -150,7 +150,7 @@ ; AVX512CD-NEXT: vpaddq %xmm1, %xmm0, %xmm1 ; AVX512CD-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0 -; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX512CD-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64,64] ; AVX512CD-NEXT: vpsubq %xmm0, %xmm1, %xmm0 ; AVX512CD-NEXT: vzeroupper ; AVX512CD-NEXT: retq @@ -341,7 +341,7 @@ ; AVX512CDVL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 ; AVX512CDVL-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vplzcntq %xmm0, %xmm0 -; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX512CDVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64,64] ; AVX512CDVL-NEXT: vpsubq %xmm0, %xmm1, %xmm0 ; AVX512CDVL-NEXT: retq ; @@ -351,7 +351,7 @@ ; AVX512CD-NEXT: vpaddq %xmm1, %xmm0, %xmm1 ; AVX512CD-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0 -; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX512CD-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64,64] ; AVX512CD-NEXT: vpsubq %xmm0, %xmm1, %xmm0 ; AVX512CD-NEXT: vzeroupper ; AVX512CD-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll --- a/llvm/test/CodeGen/X86/vselect-pcmp.ll +++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll @@ -644,8 +644,7 @@ ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1 +; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-NEXT: vzeroupper @@ -653,7 +652,7 @@ ; ; AVX512VL-LABEL: blend_splatmax_mask_cond_v2i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 ; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1} ; AVX512VL-NEXT: retq ;