diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9503,7 +9503,8 @@ // For size optimization, also splat v2f64 and v2i64, and for size opt // with AVX2, also splat i8 and i16. // With pattern matching, the VBROADCAST node may become a VMOVDDUP. - if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || + if (ScalarSize == 32 || + (ScalarSize == 64 && (IsGE256 || Subtarget.hasAVX512())) || (ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) || (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { const Constant *C = nullptr; diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1519,3 +1519,3 @@ -; X64-AVX512VL-NEXT: vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4] -; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4] +; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x79,0x59,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -296 +296 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8086,8086] @@ -306 +306 @@ -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8086,8086] @@ -316 +316 @@ -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8086,8086] @@ -327,2 +327 @@ -; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086] -; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 @@ -335 +334 @@ -; SKX-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; SKX-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll --- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll @@ -180 +180 @@ -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] @@ -587 +587,2 @@ -; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [3,3] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3,3] +; CHECK-NEXT: # xmm0 = mem[0,0] @@ -789 +790,2 @@ -; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] @@ -1006 +1008,2 @@ -; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [NaN,NaN] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [NaN,NaN] +; CHECK-NEXT: # xmm0 = mem[0,0] @@ -1211 +1214,2 @@ -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] @@ -1413 +1417,2 @@ -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [3.0E+0,3.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3.0E+0,3.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] @@ -1620 +1625,2 @@ -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] @@ -1868 +1874,2 @@ -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] @@ -1903 +1910,2 @@ -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] @@ -2189 +2197,2 @@ -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] @@ -2403 +2412,2 @@ -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] @@ -2614 +2624 @@ -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] @@ -2822 +2832 @@ -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] @@ -3030 +3040 @@ -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] @@ -3238 +3248 @@ -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] @@ -3452 +3462 @@ -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1] @@ -3458 +3468 @@ -; CHECK-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} @@ -3671 +3681 @@ -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1] @@ -3677 +3687 @@ -; CHECK-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} @@ -3893 +3903 @@ -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [1,1] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1] @@ -3899 +3909 @@ -; CHECK-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} @@ -4118 +4128 @@ -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,2] +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [2,2] @@ -4124 +4134 @@ -; CHECK-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} @@ -4343,2 +4353,4 @@ -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] -; CHECK-NEXT: vmovapd {{.*#+}} xmm1 = [3.0E+0,3.0E+0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0] +; CHECK-NEXT: # xmm0 = mem[0,0] +; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [3.0E+0,3.0E+0] +; CHECK-NEXT: # xmm1 = mem[0,0] diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -3048 +3048,2 @@ -; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 @@ -3062 +3063 @@ -; AVX512DQNOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512DQNOBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll --- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -1195,2 +1195,4 @@ -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A] -; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512-NEXT: ## encoding: [0xc4,0xe2,0x79,0x59,0x0d,A,A,A,A] +; AVX512-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0xc1] @@ -1216 +1218,2 @@ -; AVX512-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0x05,A,A,A,A] +; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [1,1] +; AVX512-NEXT: ## encoding: [0xc5,0xfb,0x12,0x0d,A,A,A,A] @@ -1217,0 +1221,2 @@ +; AVX512-NEXT: ## xmm1 = mem[0,0] +; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0xc1] diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith.ll b/llvm/test/CodeGen/X86/avx512fp16-arith.ll --- a/llvm/test/CodeGen/X86/avx512fp16-arith.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-arith.ll @@ -373 +373 @@ -; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll @@ -764 +764 @@ -; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll b/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll --- a/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll @@ -360 +360 @@ -; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 @@ -373 +373 @@ -; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll --- a/llvm/test/CodeGen/X86/avx512vl-logic.ll +++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll @@ -171 +171 @@ -; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -184 +184 @@ -; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -198 +198 @@ -; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -211 +211 @@ -; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/fma-fneg-combine.ll b/llvm/test/CodeGen/X86/fma-fneg-combine.ll --- a/llvm/test/CodeGen/X86/fma-fneg-combine.ll +++ b/llvm/test/CodeGen/X86/fma-fneg-combine.ll @@ -229,5 +229,13 @@ -; CHECK-LABEL: test10: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 -; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: retq +; SKX-LABEL: test10: +; SKX: # %bb.0: # %entry +; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; SKX-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; SKX-NEXT: retq +; +; KNL-LABEL: test10: +; KNL: # %bb.0: # %entry +; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; KNL-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; KNL-NEXT: # xmm1 = mem[0,0] +; KNL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retq @@ -308 +316 @@ -; SKX-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 +; SKX-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm3 @@ -317 +325,3 @@ -; KNL-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 +; KNL-NEXT: vmovddup {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0] +; KNL-NEXT: # xmm3 = mem[0,0] +; KNL-NEXT: vxorpd %xmm3, %xmm0, %xmm3 diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -1484 +1484,2 @@ -; AVX512-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0] +; AVX512-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] +; AVX512-INFS-NEXT: # xmm3 = mem[0,0] diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll b/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics-fma.ll @@ -229,5 +229,5 @@ -; FMA-LABEL: f8: -; FMA: # %bb.0: # %entry -; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 -; FMA-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-NEXT: retq +; FMA-AVX1-LABEL: f8: +; FMA-AVX1: # %bb.0: # %entry +; FMA-AVX1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; FMA-AVX1-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-AVX1-NEXT: retq @@ -239,0 +240,8 @@ +; +; FMA-AVX512-LABEL: f8: +; FMA-AVX512: # %bb.0: # %entry +; FMA-AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; FMA-AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; FMA-AVX512-NEXT: # xmm1 = mem[0,0] +; FMA-AVX512-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; FMA-AVX512-NEXT: retq @@ -304,5 +312,5 @@ -; FMA-LABEL: f10: -; FMA: # %bb.0: # %entry -; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 -; FMA-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-NEXT: retq +; FMA-AVX1-LABEL: f10: +; FMA-AVX1: # %bb.0: # %entry +; FMA-AVX1-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 +; FMA-AVX1-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-AVX1-NEXT: retq @@ -314,0 +323,8 @@ +; +; FMA-AVX512-LABEL: f10: +; FMA-AVX512: # %bb.0: # %entry +; FMA-AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 +; FMA-AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; FMA-AVX512-NEXT: # xmm1 = mem[0,0] +; FMA-AVX512-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; FMA-AVX512-NEXT: retq @@ -933,5 +949,5 @@ -; FMA-LABEL: f26: -; FMA: # %bb.0: # %entry -; FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 -; FMA-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-NEXT: retq +; FMA-AVX1-LABEL: f26: +; FMA-AVX1: # %bb.0: # %entry +; FMA-AVX1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; FMA-AVX1-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-AVX1-NEXT: retq @@ -943,0 +960,8 @@ +; +; FMA-AVX512-LABEL: f26: +; FMA-AVX512: # %bb.0: # %entry +; FMA-AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; FMA-AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; FMA-AVX512-NEXT: # xmm1 = mem[0,0] +; FMA-AVX512-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; FMA-AVX512-NEXT: retq @@ -1056,5 +1080,5 @@ -; FMA-LABEL: f28: -; FMA: # %bb.0: # %entry -; FMA-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 -; FMA-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-NEXT: retq +; FMA-AVX1-LABEL: f28: +; FMA-AVX1: # %bb.0: # %entry +; FMA-AVX1-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 +; FMA-AVX1-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-AVX1-NEXT: retq @@ -1066,0 +1091,8 @@ +; +; FMA-AVX512-LABEL: f28: +; FMA-AVX512: # %bb.0: # %entry +; FMA-AVX512-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 +; FMA-AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; FMA-AVX512-NEXT: # xmm1 = mem[0,0] +; FMA-AVX512-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; FMA-AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/fp-round.ll b/llvm/test/CodeGen/X86/fp-round.ll --- a/llvm/test/CodeGen/X86/fp-round.ll +++ b/llvm/test/CodeGen/X86/fp-round.ll @@ -141 +141 @@ -; AVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1 @@ -242,2 +242,2 @@ -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1] -; AVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1] +; AVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -2,7 +2,7 @@ -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+sse | FileCheck %s --check-prefix=X64-SSE -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck %s --check-prefix=X64-SSE -; RUN: llc < %s -O2 -mtriple=i686-linux-gnu -mattr=+mmx | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+avx | FileCheck %s --check-prefix=X64-AVX -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=X64-AVX -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+avx512f | FileCheck %s --check-prefix=X64-AVX -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=X64-AVX +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+sse | FileCheck %s --check-prefixes=X64-SSE +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck %s --check-prefixes=X64-SSE +; RUN: llc < %s -O2 -mtriple=i686-linux-gnu -mattr=+mmx | FileCheck %s --check-prefixes=X32 +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1 +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1 +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+avx512f | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512 +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512 @@ -1319,15 +1319,33 @@ -; X64-AVX-LABEL: TestTruncCopysign: -; X64-AVX: # %bb.0: # %entry -; X64-AVX-NEXT: cmpl $50001, %edi # imm = 0xC351 -; X64-AVX-NEXT: jl .LBB26_2 -; X64-AVX-NEXT: # %bb.1: # %if.then -; X64-AVX-NEXT: pushq %rax -; X64-AVX-NEXT: callq __trunctfdf2@PLT -; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [+Inf,+Inf] -; X64-AVX-NEXT: # xmm1 = mem[0,0] -; X64-AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: callq __extenddftf2@PLT -; X64-AVX-NEXT: addq $8, %rsp -; X64-AVX-NEXT: .LBB26_2: # %cleanup -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: TestTruncCopysign: +; X64-AVX1: # %bb.0: # %entry +; X64-AVX1-NEXT: cmpl $50001, %edi # imm = 0xC351 +; X64-AVX1-NEXT: jl .LBB26_2 +; X64-AVX1-NEXT: # %bb.1: # %if.then +; X64-AVX1-NEXT: pushq %rax +; X64-AVX1-NEXT: callq __trunctfdf2@PLT +; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [+Inf,+Inf] +; X64-AVX1-NEXT: # xmm1 = mem[0,0] +; X64-AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: callq __extenddftf2@PLT +; X64-AVX1-NEXT: addq $8, %rsp +; X64-AVX1-NEXT: .LBB26_2: # %cleanup +; X64-AVX1-NEXT: retq +; +; X64-AVX512-LABEL: TestTruncCopysign: +; X64-AVX512: # %bb.0: # %entry +; X64-AVX512-NEXT: cmpl $50001, %edi # imm = 0xC351 +; X64-AVX512-NEXT: jl .LBB26_2 +; X64-AVX512-NEXT: # %bb.1: # %if.then +; X64-AVX512-NEXT: pushq %rax +; X64-AVX512-NEXT: callq __trunctfdf2@PLT +; X64-AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] +; X64-AVX512-NEXT: # xmm1 = mem[0,0] +; X64-AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [+Inf,+Inf] +; X64-AVX512-NEXT: # xmm1 = mem[0,0] +; X64-AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: callq __extenddftf2@PLT +; X64-AVX512-NEXT: addq $8, %rsp +; X64-AVX512-NEXT: .LBB26_2: # %cleanup +; X64-AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll --- a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll +++ b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll @@ -34 +34 @@ -; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/gfni-rotates.ll b/llvm/test/CodeGen/X86/gfni-rotates.ll --- a/llvm/test/CodeGen/X86/gfni-rotates.ll +++ b/llvm/test/CodeGen/X86/gfni-rotates.ll @@ -35 +35 @@ -; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/i64-to-float.ll b/llvm/test/CodeGen/X86/i64-to-float.ll --- a/llvm/test/CodeGen/X86/i64-to-float.ll +++ b/llvm/test/CodeGen/X86/i64-to-float.ll @@ -369,2 +369,2 @@ -; X64-AVX512F-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX512F-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512F-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; X64-AVX512F-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -377,2 +377,2 @@ -; X64-AVX512DQ-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-AVX512DQ-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll --- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll @@ -2502,2 +2502,2 @@ -; AVX512VL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -2684,2 +2684,2 @@ -; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -2853,2 +2853,2 @@ -; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll --- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll @@ -2181 +2181 @@ -; AVX512VL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -2343 +2343 @@ -; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -2492 +2492 @@ -; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll --- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll @@ -1021 +1021 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] @@ -1057 +1057 @@ -; AVX512BW-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512BW-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] @@ -1244 +1244 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] @@ -1280 +1280 @@ -; AVX512BW-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512BW-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] @@ -1468 +1468 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] @@ -1505 +1505 @@ -; AVX512BW-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512BW-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] @@ -1691 +1691 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] @@ -1728 +1728 @@ -; AVX512BW-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512BW-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] @@ -1919 +1919 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] @@ -1957 +1957 @@ -; AVX512BW-FALLBACK-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] +; AVX512BW-FALLBACK-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -2121,2 +2121 @@ -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1] -; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0 +; KNL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0 @@ -2131 +2130 @@ -; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0 +; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0 @@ -2163,2 +2162 @@ -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1] -; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0 +; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0 @@ -2173 +2171 @@ -; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0 +; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0 @@ -3410,2 +3408 @@ -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] -; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0 +; KNL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0 @@ -3420 +3417 @@ -; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0 +; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0 @@ -3452,2 +3449 @@ -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] -; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0 +; KNL-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0 @@ -3462 +3458 @@ -; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0 +; SKX-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0 diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -123,9 +123,19 @@ -; AVX-LABEL: mul_v2i64c: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117] -; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 -; AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 -; AVX-NEXT: retq +; AVX2-LABEL: mul_v2i64c: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117] +; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: mul_v2i64c: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [117,117] +; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX512-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -1220,2 +1220,5 @@ -; AVX512F-NEXT: vmovapd {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX512F-NEXT: vblendvpd %xmm2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX512F-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775807,9223372036854775807] +; AVX512F-NEXT: # xmm3 = mem[0,0] +; AVX512F-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX512F-NEXT: # xmm4 = mem[0,0] +; AVX512F-NEXT: vblendvpd %xmm2, %xmm3, %xmm4, %xmm3 @@ -1235,2 +1238,2 @@ -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; AVX512BW-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k2} +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] +; AVX512BW-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k2} diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll --- a/llvm/test/CodeGen/X86/sat-add.ll +++ b/llvm/test/CodeGen/X86/sat-add.ll @@ -670,2 +670,2 @@ -; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -739,2 +739,2 @@ -; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -806,2 +806,2 @@ -; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll --- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -854,7 +854,17 @@ -; AVX-LABEL: div_sqrt_fabs_f64: -; AVX: # %bb.0: -; AVX-NEXT: vsqrtsd %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vmulsd %xmm1, %xmm2, %xmm1 -; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: div_sqrt_fabs_f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vsqrtsd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vmulsd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: div_sqrt_fabs_f64: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovddup {{.*#+}} xmm3 = [NaN,NaN] +; AVX512-NEXT: # xmm3 = mem[0,0] +; AVX512-NEXT: vsqrtsd %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vandpd %xmm3, %xmm1, %xmm1 +; AVX512-NEXT: vmulsd %xmm1, %xmm2, %xmm1 +; AVX512-NEXT: vdivsd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq @@ -1051,7 +1061,16 @@ -; AVX-LABEL: sqrt_simplify_before_recip_vec: -; AVX: # %bb.0: -; AVX-NEXT: vsqrtpd %xmm0, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,1.0E+0] -; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vmovupd %xmm1, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: sqrt_simplify_before_recip_vec: +; AVX1: # %bb.0: +; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,1.0E+0] +; AVX1-NEXT: vdivpd %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vmovupd %xmm1, (%rdi) +; AVX1-NEXT: retq +; +; AVX512-LABEL: sqrt_simplify_before_recip_vec: +; AVX512: # %bb.0: +; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 +; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0] +; AVX512-NEXT: # xmm1 = mem[0,0] +; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm1 +; AVX512-NEXT: vmovupd %xmm1, (%rdi) +; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll --- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll @@ -1319,2 +1319,5 @@ -; AVX512F-NEXT: vmovapd {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX512F-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775807,9223372036854775807] +; AVX512F-NEXT: # xmm2 = mem[0,0] +; AVX512F-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] +; AVX512F-NEXT: # xmm3 = mem[0,0] +; AVX512F-NEXT: vblendvpd %xmm1, %xmm2, %xmm3, %xmm2 @@ -1332,2 +1335,2 @@ -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] -; AVX512BW-NEXT: vmovdqa64 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k2} +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] +; AVX512BW-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k2} diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll --- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll @@ -260 +260 @@ -; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411] +; CHECK-AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411] @@ -264 +264 @@ -; CHECK-AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll --- a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll +++ b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll @@ -35 +35 @@ -; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll --- a/llvm/test/CodeGen/X86/vec_fabs.ll +++ b/llvm/test/CodeGen/X86/vec_fabs.ll @@ -10,4 +10,4 @@ -; X86-LABEL: fabs_v2f64: -; X86: # %bb.0: -; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; X86-NEXT: retl +; X86-AVX-LABEL: fabs_v2f64: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: retl @@ -15,4 +15,24 @@ -; X64-LABEL: fabs_v2f64: -; X64: # %bb.0: -; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-NEXT: retq +; X86-AVX512VL-LABEL: fabs_v2f64: +; X86-AVX512VL: # %bb.0: +; X86-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 +; X86-AVX512VL-NEXT: retl +; +; X86-AVX512VLDQ-LABEL: fabs_v2f64: +; X86-AVX512VLDQ: # %bb.0: +; X86-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 +; X86-AVX512VLDQ-NEXT: retl +; +; X64-AVX-LABEL: fabs_v2f64: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: retq +; +; X64-AVX512VL-LABEL: fabs_v2f64: +; X64-AVX512VL: # %bb.0: +; X64-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; X64-AVX512VL-NEXT: retq +; +; X64-AVX512VLDQ-LABEL: fabs_v2f64: +; X64-AVX512VLDQ: # %bb.0: +; X64-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; X64-AVX512VLDQ-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -618 +618,2 @@ -; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4841369599423283200,4841369599423283200] +; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 @@ -620,2 +621,5 @@ -; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4985484787499139072,4985484787499139072] +; AVX512F-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512F-NEXT: # xmm2 = mem[0,0] +; AVX512F-NEXT: vsubpd %xmm2, %xmm0, %xmm0 @@ -629 +633 @@ -; AVX512VL-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 @@ -631,2 +635,2 @@ -; AVX512VL-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -3291 +3295,2 @@ -; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4841369599423283200,4841369599423283200] +; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 @@ -3293,2 +3298,5 @@ -; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [4985484787499139072,4985484787499139072] +; AVX512F-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512F-NEXT: # xmm2 = mem[0,0] +; AVX512F-NEXT: vsubpd %xmm2, %xmm0, %xmm0 @@ -3303 +3311 @@ -; AVX512VL-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 @@ -3305,2 +3313,2 @@ -; AVX512VL-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 @@ -5687 +5695,2 @@ -; AVX512F-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm1 @@ -5690 +5699 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] @@ -5693 +5702 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] @@ -5695 +5704,2 @@ -; AVX512F-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512F-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512F-NEXT: # xmm6 = mem[0,0] @@ -5704 +5714,2 @@ -; AVX512F-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512F-NEXT: # xmm2 = mem[0,0] @@ -5713 +5724 @@ -; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1 @@ -5716 +5727 @@ -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] @@ -5719 +5730 @@ -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] @@ -5721 +5732,2 @@ -; AVX512VL-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512VL-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512VL-NEXT: # xmm6 = mem[0,0] @@ -5730 +5742,2 @@ -; AVX512VL-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512VL-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512VL-NEXT: # xmm2 = mem[0,0] @@ -5740 +5753,2 @@ -; AVX512DQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; AVX512DQ-NEXT: vpaddq %xmm1, %xmm0, %xmm1 @@ -5743 +5757,2 @@ -; AVX512DQ-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512DQ-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512DQ-NEXT: # xmm2 = mem[0,0] @@ -5753 +5768 @@ -; AVX512VLDQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512VLDQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1 @@ -5756 +5771,2 @@ -; AVX512VLDQ-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512VLDQ-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] +; AVX512VLDQ-NEXT: # xmm2 = mem[0,0] diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -473,4 +473,15 @@ -; GFNIAVX-LABEL: test_bitreverse_v16i8: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v16i8: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v16i8: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v16i8: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq @@ -552,5 +563,18 @@ -; GFNIAVX-LABEL: test_bitreverse_v8i16: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v8i16: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v8i16: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v8i16: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq @@ -637,5 +661,18 @@ -; GFNIAVX-LABEL: test_bitreverse_v4i32: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v4i32: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v4i32: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v4i32: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq @@ -724,5 +761,18 @@ -; GFNIAVX-LABEL: test_bitreverse_v2i64: -; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] -; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; GFNIAVX-NEXT: retq +; GFNIAVX1-LABEL: test_bitreverse_v2i64: +; GFNIAVX1: # %bb.0: +; GFNIAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: retq +; +; GFNIAVX2-LABEL: test_bitreverse_v2i64: +; GFNIAVX2: # %bb.0: +; GFNIAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX2-NEXT: retq +; +; GFNIAVX512-LABEL: test_bitreverse_v2i64: +; GFNIAVX512: # %bb.0: +; GFNIAVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] +; GFNIAVX512-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0 +; GFNIAVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -35,5 +35,13 @@ -; AVX-LABEL: constrained_vector_fdiv_v2f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] -; AVX-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fdiv_v2f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] +; AVX1-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fdiv_v2f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = [1.0E+1,1.0E+1] +; AVX512-NEXT: # xmm0 = mem[0,0] +; AVX512-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0] +; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq @@ -98,8 +106,19 @@ -; AVX-LABEL: constrained_vector_fdiv_v3f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0] -; AVX-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fdiv_v3f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0] +; AVX1-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fdiv_v3f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = [1.0E+1,1.0E+1] +; AVX512-NEXT: # xmm0 = mem[0,0] +; AVX512-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0] +; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX512-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: retq @@ -432,5 +451,12 @@ -; AVX-LABEL: constrained_vector_fmul_v2f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] -; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fmul_v2f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX1-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fmul_v2f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX512-NEXT: # xmm0 = mem[0,0] +; AVX512-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: retq @@ -492,8 +518,18 @@ -; AVX-LABEL: constrained_vector_fmul_v3f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] -; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fmul_v3f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX1-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fmul_v3f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX512-NEXT: # xmm1 = mem[0,0] +; AVX512-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: retq @@ -569,5 +605,12 @@ -; AVX-LABEL: constrained_vector_fadd_v2f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] -; AVX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fadd_v2f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX1-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fadd_v2f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX512-NEXT: # xmm0 = mem[0,0] +; AVX512-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: retq @@ -630,8 +673,18 @@ -; AVX-LABEL: constrained_vector_fadd_v3f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] -; AVX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fadd_v3f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX1-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fadd_v3f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX512-NEXT: # xmm1 = mem[0,0] +; AVX512-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: retq @@ -707,5 +760,12 @@ -; AVX-LABEL: constrained_vector_fsub_v2f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; AVX-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fsub_v2f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fsub_v2f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX512-NEXT: # xmm0 = mem[0,0] +; AVX512-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: retq @@ -770,9 +830,20 @@ -; AVX-LABEL: constrained_vector_fsub_v3f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; AVX-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fsub_v3f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fsub_v3f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX512-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX512-NEXT: # xmm1 = mem[0,0] +; AVX512-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -98 +98 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -109 +109 @@ -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -120 +120 @@ -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -141 +141 @@ -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -968 +968 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -979 +979 @@ -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -990 +990 @@ -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -1011 +1011 @@ -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -2368 +2368 @@ -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0 @@ -2393 +2393 @@ -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0 @@ -2400 +2400 @@ -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -775 +775 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -786 +786 @@ -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -797 +797 @@ -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -817 +817 @@ -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -429 +429 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -440 +440 @@ -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -451 +451 @@ -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -468 +468 @@ -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -1837 +1837 @@ -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 @@ -1853 +1853 @@ -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 @@ -1869 +1869 @@ -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -98 +98 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -109 +109 @@ -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -120 +120 @@ -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -141 +141 @@ -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -1057 +1057 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -1068 +1068 @@ -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -1079 +1079 @@ -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -1100 +1100 @@ -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -2320 +2320 @@ -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0 @@ -2345 +2345 @@ -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0 @@ -2352 +2352 @@ -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -806 +806 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -817 +817 @@ -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -828 +828 @@ -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -848 +848 @@ -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -427 +427 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -438 +438 @@ -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -449 +449 @@ -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] @@ -467 +467 @@ -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] +; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -1883 +1883 @@ -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 @@ -1899 +1899 @@ -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 @@ -1915 +1915 @@ -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll --- a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll @@ -16985 +16985,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -16992 +16993 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -17089 +17090 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] @@ -17097 +17098 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -17272 +17273,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -17279 +17281 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -17290 +17292,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -17299 +17302 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -17458 +17461 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] @@ -17466 +17469 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -17477 +17480 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] @@ -17487 +17490 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -17644 +17647,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -17651 +17655 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -17662 +17666,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -17671 +17676 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -17830 +17835 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] @@ -17838 +17843 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -17849 +17854 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] @@ -17859 +17864 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18016 +18021,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -18023 +18029 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18034 +18040,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -18043 +18050 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18202 +18209 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] @@ -18210 +18217 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18221 +18228 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] @@ -18231 +18238 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18388 +18395,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -18395 +18403 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18406 +18414,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -18415 +18424 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18574 +18583 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] @@ -18582 +18591 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18593 +18602 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] @@ -18603 +18612 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18760 +18769,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -18767 +18777 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18778 +18788,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -18787 +18798 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18946 +18957 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] @@ -18954 +18965 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -18965 +18976 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] @@ -18975 +18986 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -19132 +19143,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -19139 +19151 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -19150 +19162,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -19159 +19172 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -19318 +19331 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] @@ -19326 +19339 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -19337 +19350 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] @@ -19347 +19360 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -19504 +19517,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -19511 +19525 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -19522 +19536,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -19531 +19546 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -19690 +19705 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] @@ -19698 +19713 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -19709 +19724 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] @@ -19719 +19734 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -19876 +19891,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -19883 +19899 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -19894 +19910,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -19903 +19920 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -20062 +20079 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] @@ -20070 +20087 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -20081 +20098 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] @@ -20091 +20108 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -20248 +20265,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -20255 +20273 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -20266 +20284,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -20275 +20294 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -20434 +20453 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] @@ -20442 +20461 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -20453 +20472 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] @@ -20463 +20482 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -20620 +20639,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -20627 +20647 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -20638 +20658,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -20647 +20668 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -20806 +20827 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] @@ -20814 +20835 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -20825 +20846 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] @@ -20835 +20856 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -20992 +21013,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -20999 +21021 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -21010 +21032,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -21019 +21042 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -21178 +21201 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] @@ -21186 +21209 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -21197 +21220 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] @@ -21207 +21230 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -21364 +21387,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -21371 +21395 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -21382 +21406,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -21391 +21416 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -21550 +21575 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] @@ -21558 +21583 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -21569 +21594 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] @@ -21579 +21604 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -21736 +21761,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -21743 +21769 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -21754 +21780,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -21763 +21790 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -21922 +21949 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] @@ -21930 +21957 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -21941 +21968 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] @@ -21951 +21978 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -22108 +22135,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -22115 +22143 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -22126 +22154,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -22135 +22164 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -22294 +22323 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] @@ -22302 +22331 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -22313 +22342 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] @@ -22323 +22352 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -22480 +22509,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -22487 +22517 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -22498 +22528,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -22507 +22538 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -22666 +22697 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] @@ -22674 +22705 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -22685 +22716 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] @@ -22695 +22726 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -22852 +22883,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -22859 +22891 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -22870 +22902,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -22879 +22912 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23038 +23071 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] @@ -23046 +23079 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23057 +23090 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] @@ -23067 +23100 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23224 +23257,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -23231 +23265 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23242 +23276,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -23251 +23286 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23410 +23445 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] @@ -23418 +23453 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23429 +23464 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] @@ -23439 +23474 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23596 +23631,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -23603 +23639 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23614 +23650,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -23623 +23660 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23782 +23819 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] @@ -23790 +23827 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23801 +23838 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] @@ -23811 +23848 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23968 +24005,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -23975 +24013 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -23986 +24024,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -23995 +24034 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -24154 +24193 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] @@ -24162 +24201 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -24173 +24212 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] @@ -24183 +24222 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -24340 +24379,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -24347 +24387 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -24358 +24398,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -24367 +24408 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -24526 +24567 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] @@ -24534 +24575 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -24545 +24586 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] @@ -24555 +24596 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -24712 +24753,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -24719 +24761 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -24730 +24772,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -24739 +24782 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -24898 +24941 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] @@ -24906 +24949 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -24917 +24960 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] @@ -24927 +24970 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -25084 +25127,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -25091 +25135 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -25102 +25146,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -25111 +25156 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -25270 +25315 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] @@ -25278 +25323 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -25289 +25334 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] @@ -25299 +25344 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -25456 +25501,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -25463 +25509 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -25474 +25520,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -25483 +25530 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -25642 +25689 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] @@ -25650 +25697 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -25661 +25708 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] @@ -25671 +25718 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -25828 +25875,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -25835 +25883 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -25846 +25894,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -25855 +25904 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26014 +26063 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] @@ -26022 +26071 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26033 +26082 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] @@ -26043 +26092 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26200 +26249,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -26207 +26257 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26218 +26268,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -26227 +26278 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26386 +26437 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] @@ -26394 +26445 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26405 +26456 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] @@ -26415 +26466 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26572 +26623,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -26579 +26631 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26590 +26642,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -26599 +26652 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26758 +26811 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] @@ -26766 +26819 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26777 +26830 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] @@ -26787 +26840 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26944 +26997,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -26951 +27005 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -26962 +27016,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -26971 +27026 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -27130 +27185 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] @@ -27138 +27193 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -27149 +27204 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] @@ -27159 +27214 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -27316 +27371,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -27323 +27379 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -27334 +27390,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -27343 +27400 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -27502 +27559 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] @@ -27510 +27567 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -27521 +27578 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] @@ -27531 +27588 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -27688 +27745,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -27695 +27753 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -27706 +27764,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -27715 +27774 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -27874 +27933 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] @@ -27882 +27941 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -27893 +27952 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] @@ -27903 +27962 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -28060 +28119,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -28067 +28127 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -28078 +28138,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -28087 +28148 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -28246 +28307 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] @@ -28254 +28315 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -28265 +28326 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] @@ -28275 +28336 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -28432 +28493,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -28439 +28501 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -28450 +28512,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -28459 +28522 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -28618 +28681 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] @@ -28626 +28689 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -28637 +28700 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] @@ -28647 +28710 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -28804 +28867,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -28811 +28875 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -28822 +28886,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -28831 +28896 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -28990 +29055 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] @@ -28998 +29063 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -29009 +29074 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] @@ -29019 +29084 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -29176 +29241,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -29183 +29249 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -29194 +29260,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -29203 +29270 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -29362 +29429 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] @@ -29370 +29437 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -29381 +29448 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] @@ -29391 +29458 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -29548 +29615,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -29555 +29623 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -29566 +29634,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -29575 +29644 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -29734 +29803 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] @@ -29742 +29811 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -29753 +29822 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] @@ -29763 +29832 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -29920 +29989,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -29927 +29997 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -29938 +30008,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -29947 +30018 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -30106 +30177 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] @@ -30114 +30185 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -30125 +30196 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] @@ -30135 +30206 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -30292 +30363,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -30299 +30371 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -30310 +30382,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -30319 +30392 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -30478 +30551 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] @@ -30486 +30559 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -30497 +30570 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] @@ -30507 +30580 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -30664 +30737,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -30671 +30745 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -30682 +30756,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -30691 +30766 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -30850 +30925 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] @@ -30858 +30933 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -30869 +30944 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] @@ -30879 +30954 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31036 +31111,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -31043 +31119 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31054 +31130,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -31063 +31140 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31222 +31299 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] @@ -31230 +31307 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31241 +31318 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] @@ -31251 +31328 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31408 +31485,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -31415 +31493 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31426 +31504,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -31435 +31514 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31594 +31673 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] @@ -31602 +31681 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31613 +31692 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] @@ -31623 +31702 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31780 +31859,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -31787 +31867 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31798 +31878,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -31807 +31888 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31966 +32047 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] @@ -31974 +32055 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -31985 +32066 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] @@ -31995 +32076 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -32152 +32233,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -32159 +32241 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -32170 +32252,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -32179 +32262 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -32338 +32421 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] @@ -32346 +32429 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -32357 +32440 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] @@ -32367 +32450 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -32524 +32607,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -32531 +32615 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -32542 +32626,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -32551 +32636 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -32710 +32795 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] @@ -32718 +32803 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -32729 +32814 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] @@ -32739 +32824 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -32896 +32981,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -32903 +32989 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -32914 +33000,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -32923 +33010 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -33082 +33169 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] @@ -33090 +33177 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -33101 +33188 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] @@ -33111 +33198 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -33268 +33355,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -33275 +33363 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -33286 +33374,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -33295 +33384 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -33454 +33543 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] @@ -33462 +33551 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -33473 +33562 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] @@ -33483 +33572 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -33640 +33729,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -33647 +33737 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -33658 +33748,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -33667 +33758 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -33826 +33917 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] @@ -33834 +33925 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -33845 +33936 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] @@ -33855 +33946 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34012 +34103,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -34019 +34111 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34030 +34122,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -34039 +34132 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34198 +34291 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] @@ -34206 +34299 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34217 +34310 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] @@ -34227 +34320 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34384 +34477,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -34391 +34485 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34402 +34496,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -34411 +34506 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34570 +34665 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] @@ -34578 +34673 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34589 +34684 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] @@ -34599 +34694 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34756 +34851,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -34763 +34859 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34774 +34870,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -34783 +34880 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34942 +35039 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] @@ -34950 +35047 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -34961 +35058 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] @@ -34971 +35068 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -35128 +35225,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -35135 +35233 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -35146 +35244,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -35155 +35254 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -35314 +35413 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] @@ -35322 +35421 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -35333 +35432 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] @@ -35343 +35442 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -35500 +35599,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -35507 +35607 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -35518 +35618,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -35527 +35628 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -35686 +35787 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] @@ -35694 +35795 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -35705 +35806 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] @@ -35715 +35816 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -35872 +35973,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -35879 +35981 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -35890 +35992,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -35899 +36002 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -36058 +36161 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] @@ -36066 +36169 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -36077 +36180 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] @@ -36087 +36190 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -36244 +36347,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -36251 +36355 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -36262 +36366,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -36271 +36376 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -36430 +36535 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] @@ -36438 +36543 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -36449 +36554 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] @@ -36459 +36564 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -36616 +36721,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -36623 +36729 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -36634 +36740,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -36643 +36750 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -36802 +36909 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] @@ -36810 +36917 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -36821 +36928 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] @@ -36831 +36938 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -36988 +37095,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -36995 +37103 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -37006 +37114,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -37015 +37124 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -37174 +37283 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] @@ -37182 +37291 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -37193 +37302 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] @@ -37203 +37312 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -37360 +37469,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -37367 +37477 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -37378 +37488,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -37387 +37498 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -37546 +37657 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] @@ -37554 +37665 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -37565 +37676 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] @@ -37575 +37686 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -37732 +37843,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -37739 +37851 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -37750 +37862,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -37759 +37872 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -37918 +38031 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] @@ -37926 +38039 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -37937 +38050 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] @@ -37947 +38060 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -38104 +38217,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -38111 +38225 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -38122 +38236,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -38131 +38246 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -38290 +38405 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] @@ -38298 +38413 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -38309 +38424 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] @@ -38319 +38434 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -38476 +38591,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -38483 +38599 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -38494 +38610,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -38503 +38620 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -38662 +38779 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] @@ -38670 +38787 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -38681 +38798 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] @@ -38691 +38808 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -38848 +38965,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -38855 +38973 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -38866 +38984,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -38875 +38994 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -39034 +39153 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] @@ -39042 +39161 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -39053 +39172 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] @@ -39063 +39182 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -39220 +39339,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -39227 +39347 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -39238 +39358,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -39247 +39368 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -39406 +39527 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] @@ -39414 +39535 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -39425 +39546 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] @@ -39435 +39556 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -39592 +39713,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -39599 +39721 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -39610 +39732,2 @@ -; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62] +; BITALG_NOVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 @@ -39619 +39742 @@ -; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -39778 +39901 @@ -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [63,63] @@ -39786 +39909 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 @@ -39797 +39920 @@ -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [63,63] @@ -39807 +39930 @@ -; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll --- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll @@ -749 +749,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512VPOPCNTDQ-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 @@ -756 +757,2 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 @@ -882 +884,2 @@ -; AVX512VPOPCNTDQ-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512VPOPCNTDQ-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 @@ -891 +894,2 @@ -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -839,5 +839,18 @@ -; AVX-LABEL: trunc_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; AVX1-LABEL: trunc_v2i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX1-NEXT: sete %al +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_v2i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX2-NEXT: sete %al +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc_v2i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535] +; AVX512-NEXT: vptest %xmm1, %xmm0 +; AVX512-NEXT: sete %al +; AVX512-NEXT: retq @@ -1031,6 +1044,21 @@ -; AVX-LABEL: PR44781: -; AVX: # %bb.0: -; AVX-NEXT: vmovdqu (%rdi), %xmm0 -; AVX-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; AVX1-LABEL: PR44781: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqu (%rdi), %xmm0 +; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX1-NEXT: sete %al +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR44781: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqu (%rdi), %xmm0 +; AVX2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX2-NEXT: sete %al +; AVX2-NEXT: retq +; +; AVX512-LABEL: PR44781: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu (%rdi), %xmm0 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64424509455,64424509455] +; AVX512-NEXT: vptest %xmm1, %xmm0 +; AVX512-NEXT: sete %al +; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -1786 +1786 @@ -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 @@ -1802 +1802 @@ -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 @@ -1818 +1818 @@ -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 @@ -2084 +2084 @@ -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 @@ -2101 +2101 @@ -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 @@ -2118 +2118 @@ -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -967 +967,2 @@ -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 @@ -975 +976 @@ -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -1058 +1058,2 @@ -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 @@ -1065 +1066 @@ -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll --- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -236 +236,2 @@ -; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -792 +792,2 @@ -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 @@ -798 +799 @@ -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -856 +856,2 @@ -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 @@ -862 +863 @@ -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll --- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll @@ -191 +191,2 @@ -; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -699 +699,2 @@ -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 @@ -705 +706 @@ -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -781 +781,2 @@ -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 @@ -787 +788 @@ -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll @@ -184 +184,2 @@ -; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] +; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll --- a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll @@ -143 +143 @@ -; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX512CDVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64,64] @@ -153 +153 @@ -; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX512CD-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64,64] @@ -344 +344 @@ -; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX512CDVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64,64] @@ -354 +354 @@ -; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX512CD-NEXT: vpbroadcastq {{.*#+}} xmm1 = [64,64] diff --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll --- a/llvm/test/CodeGen/X86/vselect-pcmp.ll +++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll @@ -647,2 +647 @@ -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1 +; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 @@ -656 +655 @@ -; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 +; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1