diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9481,6 +9481,7 @@
 
   unsigned ScalarSize = Ld.getValueSizeInBits();
   bool IsGE256 = (VT.getSizeInBits() >= 256);
+  bool IsLE256 = (VT.getSizeInBits() <= 256);
 
   // When optimizing for size, generate up to 5 extra bytes for a broadcast
   // instruction to save 8 or more bytes of constant pool data.
@@ -9504,7 +9505,9 @@
     // with AVX2, also splat i8 and i16.
     // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
     if (ScalarSize == 32 ||
-        (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) ||
+        (ScalarSize == 64 &&
+         (IsGE256 || Subtarget.hasVLX() ||
+          (IsLE256 && Subtarget.hasAVX2() && !Subtarget.hasAVX512()))) ||
         (ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) ||
         (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
       const Constant *C = nullptr;
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -1507,9 +1507,9 @@
 ;
 ; X64-AVX-LABEL: test_x86_avx2_psrlv_q_const:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [4,4]
-; X64-AVX-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
-; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; X64-AVX-NEXT:    vpbroadcastq {{.*#+}} xmm0 = [4,4]
+; X64-AVX-NEXT:    # encoding: [0xc4,0xe2,0x79,0x59,0x05,A,A,A,A]
+; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
 ; X64-AVX-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A]
 ; X64-AVX-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
diff --git a/llvm/test/CodeGen/X86/combine-movmsk.ll b/llvm/test/CodeGen/X86/combine-movmsk.ll
--- a/llvm/test/CodeGen/X86/combine-movmsk.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
 
 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
@@ -238,11 +238,20 @@
 ; SSE42-NEXT:    xorl $3, %eax
 ; SSE42-NEXT:    retq
 ;
-; AVX-LABEL: movmskpd_pow2_mask:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovmskpd %xmm0, %eax
-; AVX-NEXT:    xorl $3, %eax
-; AVX-NEXT:    retq
+; AVX1-LABEL: movmskpd_pow2_mask:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovmskpd %xmm0, %eax
+; AVX1-NEXT:    xorl $3, %eax
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: movmskpd_pow2_mask:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vmovmskpd %xmm0, %eax
+; AVX2-NEXT:    retq
   %1 = and <2 x i64> %a0, <i64 -9223372036854775808, i64 -9223372036854775808>
   %2 = icmp eq <2 x i64> %1, zeroinitializer
   %3 = sext <2 x i1> %2 to <2 x i64>
@@ -258,6 +267,22 @@
 ; SSE-NEXT:    movmskps %xmm0, %eax
 ; SSE-NEXT:    xorl $15, %eax
 ; SSE-NEXT:    retq
+;
+; AVX1-LABEL: movmskps_pow2_mask:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpslld $29, %xmm0, %xmm0
+; AVX1-NEXT:    vmovmskps %xmm0, %eax
+; AVX1-NEXT:    xorl $15, %eax
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: movmskps_pow2_mask:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vmovmskps %xmm0, %eax
+; AVX2-NEXT:    retq
   %1 = and <4 x i32> %a0, <i32 4, i32 4, i32 4, i32 4>
   %2 = icmp eq <4 x i32> %1, zeroinitializer
   %3 = sext <4 x i1> %2 to <4 x i32>
diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll
--- a/llvm/test/CodeGen/X86/combine-mul.ll
+++ b/llvm/test/CodeGen/X86/combine-mul.ll
@@ -345,7 +345,8 @@
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm1
-; AVX-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [1,1]
+; AVX-NEXT:    vpor %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm2
 ; AVX-NEXT:    vpmuludq %xmm1, %xmm2, %xmm2
 ; AVX-NEXT:    vpsrlq $32, %xmm1, %xmm3
diff --git a/llvm/test/CodeGen/X86/concat-cast.ll b/llvm/test/CodeGen/X86/concat-cast.ll
--- a/llvm/test/CodeGen/X86/concat-cast.ll
+++ b/llvm/test/CodeGen/X86/concat-cast.ll
@@ -373,7 +373,7 @@
 ; AVX2-LABEL: mismatch_tofp_v4i32_v4f32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15]
 ; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vsubpd %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vcvtpd2ps %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll b/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll
--- a/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll
+++ b/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll
@@ -74,7 +74,9 @@
 define <2 x double> @ExeDepsFix_broadcastsd(<2 x double> %arg, <2 x double> %arg2) {
 ; CHECK-LABEL: ExeDepsFix_broadcastsd:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm2 = [2147483647,2147483647]
+; CHECK-NEXT:    ## xmm2 = mem[0,0]
+; CHECK-NEXT:    vandpd %xmm2, %xmm0, %xmm0
 ; CHECK-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %bitcast = bitcast <2 x double> %arg to <2 x i64>
diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -317,23 +317,16 @@
 
 ; This used to crash by creating a setcc with an i64 condition on a 32-bit target.
 define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
-; X64-LABEL: extvselectsetcc_crash:
-; X64:       # %bb.0:
-; X64-NEXT:    vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; X64-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; X64-NEXT:    vandpd %xmm2, %xmm1, %xmm1
-; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X64-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3]
-; X64-NEXT:    retq
-;
-; X86-LABEL: extvselectsetcc_crash:
-; X86:       # %bb.0:
-; X86-NEXT:    vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
-; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; X86-NEXT:    vandpd %xmm2, %xmm1, %xmm1
-; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X86-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3]
-; X86-NEXT:    retl
+; CHECK-LABEL: extvselectsetcc_crash:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = [5.0E+0,5.0E+0]
+; CHECK-NEXT:    # xmm1 = mem[0,0]
+; CHECK-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1
+; CHECK-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT:    vandpd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3]
+; CHECK-NEXT:    ret{{[l|q]}}
   %cmp = fcmp oeq <2 x double> %x, <double 5.0, double 5.0>
   %s = select <2 x i1> %cmp, <2 x double> <double 1.0, double undef>, <2 x double> <double 0.0, double undef>
   %r = shufflevector <2 x double> %s, <2 x double> %x, <3 x i32> <i32 0, i32 2, i32 3>
@@ -546,7 +539,9 @@
 define double @fabs_v4f64(<4 x double> %x) nounwind {
 ; X64-LABEL: fabs_v4f64:
 ; X64:       # %bb.0:
-; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT:    vmovddup {{.*#+}} xmm1 = [NaN,NaN]
+; X64-NEXT:    # xmm1 = mem[0,0]
+; X64-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
 ;
@@ -556,7 +551,9 @@
 ; X86-NEXT:    movl %esp, %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-NEXT:    vmovddup {{.*#+}} xmm1 = [NaN,NaN]
+; X86-NEXT:    # xmm1 = mem[0,0]
+; X86-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    vmovlps %xmm0, (%esp)
 ; X86-NEXT:    fldl (%esp)
 ; X86-NEXT:    movl %ebp, %esp
@@ -818,8 +815,12 @@
 define double @copysign_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
 ; X64-LABEL: copysign_v4f64:
 ; X64:       # %bb.0:
-; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT:    vmovddup {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
+; X64-NEXT:    # xmm2 = mem[0,0]
+; X64-NEXT:    vandps %xmm2, %xmm1, %xmm1
+; X64-NEXT:    vmovddup {{.*#+}} xmm2 = [NaN,NaN]
+; X64-NEXT:    # xmm2 = mem[0,0]
+; X64-NEXT:    vandps %xmm2, %xmm0, %xmm0
 ; X64-NEXT:    vorps %xmm1, %xmm0, %xmm0
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
@@ -830,8 +831,12 @@
 ; X86-NEXT:    movl %esp, %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-NEXT:    vmovddup {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
+; X86-NEXT:    # xmm2 = mem[0,0]
+; X86-NEXT:    vandps %xmm2, %xmm1, %xmm1
+; X86-NEXT:    vmovddup {{.*#+}} xmm2 = [NaN,NaN]
+; X86-NEXT:    # xmm2 = mem[0,0]
+; X86-NEXT:    vandps %xmm2, %xmm0, %xmm0
 ; X86-NEXT:    vorps %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    vmovlps %xmm0, (%esp)
 ; X86-NEXT:    fldl (%esp)
@@ -1096,7 +1101,9 @@
 define double @round_v4f64(<4 x double> %x) nounwind {
 ; X64-LABEL: round_v4f64:
 ; X64:       # %bb.0:
-; X64-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; X64-NEXT:    vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0]
+; X64-NEXT:    # xmm1 = mem[0,0]
+; X64-NEXT:    vandpd %xmm1, %xmm0, %xmm1
 ; X64-NEXT:    vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1]
 ; X64-NEXT:    # xmm2 = mem[0,0]
 ; X64-NEXT:    vorpd %xmm2, %xmm1, %xmm1
@@ -1111,7 +1118,9 @@
 ; X86-NEXT:    movl %esp, %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
+; X86-NEXT:    vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0]
+; X86-NEXT:    # xmm1 = mem[0,0]
+; X86-NEXT:    vandpd %xmm1, %xmm0, %xmm1
 ; X86-NEXT:    vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1]
 ; X86-NEXT:    # xmm2 = mem[0,0]
 ; X86-NEXT:    vorpd %xmm2, %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
@@ -122,7 +122,7 @@
 ; X64-AVX2-LABEL: test_reduce_v2i64:
 ; X64-AVX2:       ## %bb.0:
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; X64-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
 ; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
 ; X64-AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
@@ -124,7 +124,7 @@
 ; X64-AVX2-LABEL: test_reduce_v2i64:
 ; X64-AVX2:       ## %bb.0:
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; X64-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
 ; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
 ; X64-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
--- a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
@@ -425,7 +425,7 @@
 ; AVX2-LABEL: reassociate_umax_v2i64:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm1, %xmm2, %xmm4
 ; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm5
 ; AVX2-NEXT:    vpcmpgtq %xmm5, %xmm4, %xmm4
@@ -723,7 +723,7 @@
 ; AVX2-LABEL: reassociate_umin_v2i64:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm1, %xmm2, %xmm4
 ; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm5
 ; AVX2-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
--- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
@@ -2474,10 +2474,10 @@
 ; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [2147483647,2147483647]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [2147483647,2147483647]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm3
 ; AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968]
 ; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm3
 ; AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
 ; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -2613,34 +2613,63 @@
 ; SSE4-NEXT:    pextrw $1, %xmm0, 2(%rdi)
 ; SSE4-NEXT:    retq
 ;
-; AVX-LABEL: truncstore_v2i64_v2i16:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [32767,32767]
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm4
-; AVX-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848]
-; AVX-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm4
-; AVX-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
-; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; AVX-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vmovmskpd %xmm1, %eax
-; AVX-NEXT:    xorl $3, %eax
-; AVX-NEXT:    testb $1, %al
-; AVX-NEXT:    jne .LBB7_1
-; AVX-NEXT:  # %bb.2: # %else
-; AVX-NEXT:    testb $2, %al
-; AVX-NEXT:    jne .LBB7_3
-; AVX-NEXT:  .LBB7_4: # %else2
-; AVX-NEXT:    retq
-; AVX-NEXT:  .LBB7_1: # %cond.store
-; AVX-NEXT:    vpextrw $0, %xmm0, (%rdi)
-; AVX-NEXT:    testb $2, %al
-; AVX-NEXT:    je .LBB7_4
-; AVX-NEXT:  .LBB7_3: # %cond.store1
-; AVX-NEXT:    vpextrw $1, %xmm0, 2(%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: truncstore_v2i64_v2i16:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32767,32767]
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm4
+; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848]
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm4
+; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovmskpd %xmm1, %eax
+; AVX1-NEXT:    xorl $3, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    jne .LBB7_1
+; AVX1-NEXT:  # %bb.2: # %else
+; AVX1-NEXT:    testb $2, %al
+; AVX1-NEXT:    jne .LBB7_3
+; AVX1-NEXT:  .LBB7_4: # %else2
+; AVX1-NEXT:    retq
+; AVX1-NEXT:  .LBB7_1: # %cond.store
+; AVX1-NEXT:    vpextrw $0, %xmm0, (%rdi)
+; AVX1-NEXT:    testb $2, %al
+; AVX1-NEXT:    je .LBB7_4
+; AVX1-NEXT:  .LBB7_3: # %cond.store1
+; AVX1-NEXT:    vpextrw $1, %xmm0, 2(%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: truncstore_v2i64_v2i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [32767,32767]
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm4
+; AVX2-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848]
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm4
+; AVX2-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vmovmskpd %xmm1, %eax
+; AVX2-NEXT:    xorl $3, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    jne .LBB7_1
+; AVX2-NEXT:  # %bb.2: # %else
+; AVX2-NEXT:    testb $2, %al
+; AVX2-NEXT:    jne .LBB7_3
+; AVX2-NEXT:  .LBB7_4: # %else2
+; AVX2-NEXT:    retq
+; AVX2-NEXT:  .LBB7_1: # %cond.store
+; AVX2-NEXT:    vpextrw $0, %xmm0, (%rdi)
+; AVX2-NEXT:    testb $2, %al
+; AVX2-NEXT:    je .LBB7_4
+; AVX2-NEXT:  .LBB7_3: # %cond.store1
+; AVX2-NEXT:    vpextrw $1, %xmm0, 2(%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: truncstore_v2i64_v2i16:
 ; AVX512F:       # %bb.0:
@@ -2783,33 +2812,61 @@
 ; SSE4-NEXT:    pextrb $1, %xmm2, 1(%rdi)
 ; SSE4-NEXT:    retq
 ;
-; AVX-LABEL: truncstore_v2i64_v2i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [127,127]
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm4
-; AVX-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488]
-; AVX-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm4
-; AVX-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vmovmskpd %xmm1, %eax
-; AVX-NEXT:    xorl $3, %eax
-; AVX-NEXT:    testb $1, %al
-; AVX-NEXT:    jne .LBB8_1
-; AVX-NEXT:  # %bb.2: # %else
-; AVX-NEXT:    testb $2, %al
-; AVX-NEXT:    jne .LBB8_3
-; AVX-NEXT:  .LBB8_4: # %else2
-; AVX-NEXT:    retq
-; AVX-NEXT:  .LBB8_1: # %cond.store
-; AVX-NEXT:    vpextrb $0, %xmm0, (%rdi)
-; AVX-NEXT:    testb $2, %al
-; AVX-NEXT:    je .LBB8_4
-; AVX-NEXT:  .LBB8_3: # %cond.store1
-; AVX-NEXT:    vpextrb $1, %xmm0, 1(%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: truncstore_v2i64_v2i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [127,127]
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm4
+; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488]
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm4
+; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovmskpd %xmm1, %eax
+; AVX1-NEXT:    xorl $3, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    jne .LBB8_1
+; AVX1-NEXT:  # %bb.2: # %else
+; AVX1-NEXT:    testb $2, %al
+; AVX1-NEXT:    jne .LBB8_3
+; AVX1-NEXT:  .LBB8_4: # %else2
+; AVX1-NEXT:    retq
+; AVX1-NEXT:  .LBB8_1: # %cond.store
+; AVX1-NEXT:    vpextrb $0, %xmm0, (%rdi)
+; AVX1-NEXT:    testb $2, %al
+; AVX1-NEXT:    je .LBB8_4
+; AVX1-NEXT:  .LBB8_3: # %cond.store1
+; AVX1-NEXT:    vpextrb $1, %xmm0, 1(%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: truncstore_v2i64_v2i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [127,127]
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm4
+; AVX2-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488]
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm4
+; AVX2-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vmovmskpd %xmm1, %eax
+; AVX2-NEXT:    xorl $3, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    jne .LBB8_1
+; AVX2-NEXT:  # %bb.2: # %else
+; AVX2-NEXT:    testb $2, %al
+; AVX2-NEXT:    jne .LBB8_3
+; AVX2-NEXT:  .LBB8_4: # %else2
+; AVX2-NEXT:    retq
+; AVX2-NEXT:  .LBB8_1: # %cond.store
+; AVX2-NEXT:    vpextrb $0, %xmm0, (%rdi)
+; AVX2-NEXT:    testb $2, %al
+; AVX2-NEXT:    je .LBB8_4
+; AVX2-NEXT:  .LBB8_3: # %cond.store1
+; AVX2-NEXT:    vpextrb $1, %xmm0, 1(%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: truncstore_v2i64_v2i8:
 ; AVX512F:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
--- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
@@ -2154,9 +2154,11 @@
 ; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
-; AVX2-NEXT:    vmovapd {{.*#+}} xmm2 = [4294967295,4294967295]
-; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm4 = [9223372041149743103,9223372041149743103]
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm2 = [4294967295,4294967295]
+; AVX2-NEXT:    # xmm2 = mem[0,0]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm3, %xmm0, %xmm3
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm4 = [9223372041149743103,9223372041149743103]
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
 ; AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
 ; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -2273,33 +2275,63 @@
 ; SSE4-NEXT:    pextrw $1, %xmm0, 2(%rdi)
 ; SSE4-NEXT:    retq
 ;
-; AVX-LABEL: truncstore_v2i64_v2i16:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vmovapd {{.*#+}} xmm3 = [65535,65535]
-; AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343]
-; AVX-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
-; AVX-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
-; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; AVX-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vmovmskpd %xmm1, %eax
-; AVX-NEXT:    xorl $3, %eax
-; AVX-NEXT:    testb $1, %al
-; AVX-NEXT:    jne .LBB7_1
-; AVX-NEXT:  # %bb.2: # %else
-; AVX-NEXT:    testb $2, %al
-; AVX-NEXT:    jne .LBB7_3
-; AVX-NEXT:  .LBB7_4: # %else2
-; AVX-NEXT:    retq
-; AVX-NEXT:  .LBB7_1: # %cond.store
-; AVX-NEXT:    vpextrw $0, %xmm0, (%rdi)
-; AVX-NEXT:    testb $2, %al
-; AVX-NEXT:    je .LBB7_4
-; AVX-NEXT:  .LBB7_3: # %cond.store1
-; AVX-NEXT:    vpextrw $1, %xmm0, 2(%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: truncstore_v2i64_v2i16:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovapd {{.*#+}} xmm3 = [65535,65535]
+; AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343]
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovmskpd %xmm1, %eax
+; AVX1-NEXT:    xorl $3, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    jne .LBB7_1
+; AVX1-NEXT:  # %bb.2: # %else
+; AVX1-NEXT:    testb $2, %al
+; AVX1-NEXT:    jne .LBB7_3
+; AVX1-NEXT:  .LBB7_4: # %else2
+; AVX1-NEXT:    retq
+; AVX1-NEXT:  .LBB7_1: # %cond.store
+; AVX1-NEXT:    vpextrw $0, %xmm0, (%rdi)
+; AVX1-NEXT:    testb $2, %al
+; AVX1-NEXT:    je .LBB7_4
+; AVX1-NEXT:  .LBB7_3: # %cond.store1
+; AVX1-NEXT:    vpextrw $1, %xmm0, 2(%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: truncstore_v2i64_v2i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm3 = [65535,65535]
+; AVX2-NEXT:    # xmm3 = mem[0,0]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm4, %xmm0, %xmm4
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343]
+; AVX2-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX2-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vmovmskpd %xmm1, %eax
+; AVX2-NEXT:    xorl $3, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    jne .LBB7_1
+; AVX2-NEXT:  # %bb.2: # %else
+; AVX2-NEXT:    testb $2, %al
+; AVX2-NEXT:    jne .LBB7_3
+; AVX2-NEXT:  .LBB7_4: # %else2
+; AVX2-NEXT:    retq
+; AVX2-NEXT:  .LBB7_1: # %cond.store
+; AVX2-NEXT:    vpextrw $0, %xmm0, (%rdi)
+; AVX2-NEXT:    testb $2, %al
+; AVX2-NEXT:    je .LBB7_4
+; AVX2-NEXT:  .LBB7_3: # %cond.store1
+; AVX2-NEXT:    vpextrw $1, %xmm0, 2(%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: truncstore_v2i64_v2i16:
 ; AVX512F:       # %bb.0:
@@ -2423,32 +2455,61 @@
 ; SSE4-NEXT:    pextrb $1, %xmm3, 1(%rdi)
 ; SSE4-NEXT:    retq
 ;
-; AVX-LABEL: truncstore_v2i64_v2i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vmovapd {{.*#+}} xmm3 = [255,255]
-; AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063]
-; AVX-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
-; AVX-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vmovmskpd %xmm1, %eax
-; AVX-NEXT:    xorl $3, %eax
-; AVX-NEXT:    testb $1, %al
-; AVX-NEXT:    jne .LBB8_1
-; AVX-NEXT:  # %bb.2: # %else
-; AVX-NEXT:    testb $2, %al
-; AVX-NEXT:    jne .LBB8_3
-; AVX-NEXT:  .LBB8_4: # %else2
-; AVX-NEXT:    retq
-; AVX-NEXT:  .LBB8_1: # %cond.store
-; AVX-NEXT:    vpextrb $0, %xmm0, (%rdi)
-; AVX-NEXT:    testb $2, %al
-; AVX-NEXT:    je .LBB8_4
-; AVX-NEXT:  .LBB8_3: # %cond.store1
-; AVX-NEXT:    vpextrb $1, %xmm0, 1(%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: truncstore_v2i64_v2i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovapd {{.*#+}} xmm3 = [255,255]
+; AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063]
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovmskpd %xmm1, %eax
+; AVX1-NEXT:    xorl $3, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    jne .LBB8_1
+; AVX1-NEXT:  # %bb.2: # %else
+; AVX1-NEXT:    testb $2, %al
+; AVX1-NEXT:    jne .LBB8_3
+; AVX1-NEXT:  .LBB8_4: # %else2
+; AVX1-NEXT:    retq
+; AVX1-NEXT:  .LBB8_1: # %cond.store
+; AVX1-NEXT:    vpextrb $0, %xmm0, (%rdi)
+; AVX1-NEXT:    testb $2, %al
+; AVX1-NEXT:    je .LBB8_4
+; AVX1-NEXT:  .LBB8_3: # %cond.store1
+; AVX1-NEXT:    vpextrb $1, %xmm0, 1(%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: truncstore_v2i64_v2i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm3 = [255,255]
+; AVX2-NEXT:    # xmm3 = mem[0,0]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm4, %xmm0, %xmm4
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063]
+; AVX2-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX2-NEXT:    vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vmovmskpd %xmm1, %eax
+; AVX2-NEXT:    xorl $3, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    jne .LBB8_1
+; AVX2-NEXT:  # %bb.2: # %else
+; AVX2-NEXT:    testb $2, %al
+; AVX2-NEXT:    jne .LBB8_3
+; AVX2-NEXT:  .LBB8_4: # %else2
+; AVX2-NEXT:    retq
+; AVX2-NEXT:  .LBB8_1: # %cond.store
+; AVX2-NEXT:    vpextrb $0, %xmm0, (%rdi)
+; AVX2-NEXT:    testb $2, %al
+; AVX2-NEXT:    je .LBB8_4
+; AVX2-NEXT:  .LBB8_3: # %cond.store1
+; AVX2-NEXT:    vpextrb $1, %xmm0, 1(%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: truncstore_v2i64_v2i8:
 ; AVX512F:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
@@ -975,7 +975,8 @@
 ; AVX2-FALLBACK-LABEL: vec128_i64_signed_reg_reg:
 ; AVX2-FALLBACK:       # %bb.0:
 ; AVX2-FALLBACK-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX2-FALLBACK-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX2-FALLBACK-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [1,1]
+; AVX2-FALLBACK-NEXT:    vpor %xmm3, %xmm2, %xmm3
 ; AVX2-FALLBACK-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm4
 ; AVX2-FALLBACK-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
 ; AVX2-FALLBACK-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
@@ -992,25 +993,66 @@
 ; AVX2-FALLBACK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
 ; AVX2-FALLBACK-NEXT:    retq
 ;
-; XOP-LABEL: vec128_i64_signed_reg_reg:
-; XOP:       # %bb.0:
-; XOP-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
-; XOP-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
-; XOP-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
-; XOP-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
-; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
-; XOP-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
-; XOP-NEXT:    vpsrlq $1, %xmm1, %xmm2
-; XOP-NEXT:    vpsrlq $33, %xmm1, %xmm1
-; XOP-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
-; XOP-NEXT:    vpsrlq $32, %xmm3, %xmm4
-; XOP-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
-; XOP-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
-; XOP-NEXT:    vpsllq $32, %xmm1, %xmm1
-; XOP-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
-; XOP-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
-; XOP-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
-; XOP-NEXT:    retq
+; XOP-FALLBACK-LABEL: vec128_i64_signed_reg_reg:
+; XOP-FALLBACK:       # %bb.0:
+; XOP-FALLBACK-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
+; XOP-FALLBACK-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; XOP-FALLBACK-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
+; XOP-FALLBACK-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOP-FALLBACK-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOP-FALLBACK-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOP-FALLBACK-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOP-FALLBACK-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOP-FALLBACK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOP-FALLBACK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOP-FALLBACK-NEXT:    retq
+;
+; XOPAVX1-LABEL: vec128_i64_signed_reg_reg:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
+; XOPAVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; XOPAVX1-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
+; XOPAVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOPAVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOPAVX1-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOPAVX1-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOPAVX1-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOPAVX1-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOPAVX1-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: vec128_i64_signed_reg_reg:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [1,1]
+; XOPAVX2-NEXT:    vpor %xmm3, %xmm2, %xmm3
+; XOPAVX2-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
+; XOPAVX2-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOPAVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOPAVX2-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOPAVX2-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOPAVX2-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOPAVX2-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOPAVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOPAVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: vec128_i64_signed_reg_reg:
 ; AVX512F:       # %bb.0:
@@ -1194,11 +1236,12 @@
 ;
 ; AVX2-FALLBACK-LABEL: vec128_i64_unsigned_reg_reg:
 ; AVX2-FALLBACK:       # %bb.0:
-; AVX2-FALLBACK-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-FALLBACK-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-FALLBACK-NEXT:    vpxor %xmm2, %xmm1, %xmm3
 ; AVX2-FALLBACK-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX2-FALLBACK-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm4
-; AVX2-FALLBACK-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm5
+; AVX2-FALLBACK-NEXT:    vpbroadcastq {{.*#+}} xmm5 = [1,1]
+; AVX2-FALLBACK-NEXT:    vpor %xmm5, %xmm4, %xmm5
 ; AVX2-FALLBACK-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
 ; AVX2-FALLBACK-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm2
 ; AVX2-FALLBACK-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm1
@@ -1215,25 +1258,66 @@
 ; AVX2-FALLBACK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
 ; AVX2-FALLBACK-NEXT:    retq
 ;
-; XOP-LABEL: vec128_i64_unsigned_reg_reg:
-; XOP:       # %bb.0:
-; XOP-NEXT:    vpcomgtuq %xmm1, %xmm0, %xmm2
-; XOP-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
-; XOP-NEXT:    vpcomltuq %xmm1, %xmm0, %xmm4
-; XOP-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
-; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
-; XOP-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
-; XOP-NEXT:    vpsrlq $1, %xmm1, %xmm2
-; XOP-NEXT:    vpsrlq $33, %xmm1, %xmm1
-; XOP-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
-; XOP-NEXT:    vpsrlq $32, %xmm3, %xmm4
-; XOP-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
-; XOP-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
-; XOP-NEXT:    vpsllq $32, %xmm1, %xmm1
-; XOP-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
-; XOP-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
-; XOP-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
-; XOP-NEXT:    retq
+; XOP-FALLBACK-LABEL: vec128_i64_unsigned_reg_reg:
+; XOP-FALLBACK:       # %bb.0:
+; XOP-FALLBACK-NEXT:    vpcomgtuq %xmm1, %xmm0, %xmm2
+; XOP-FALLBACK-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; XOP-FALLBACK-NEXT:    vpcomltuq %xmm1, %xmm0, %xmm4
+; XOP-FALLBACK-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOP-FALLBACK-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOP-FALLBACK-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOP-FALLBACK-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOP-FALLBACK-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOP-FALLBACK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOP-FALLBACK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOP-FALLBACK-NEXT:    retq
+;
+; XOPAVX1-LABEL: vec128_i64_unsigned_reg_reg:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vpcomgtuq %xmm1, %xmm0, %xmm2
+; XOPAVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; XOPAVX1-NEXT:    vpcomltuq %xmm1, %xmm0, %xmm4
+; XOPAVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOPAVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOPAVX1-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOPAVX1-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOPAVX1-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOPAVX1-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOPAVX1-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: vec128_i64_unsigned_reg_reg:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vpcomgtuq %xmm1, %xmm0, %xmm2
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [1,1]
+; XOPAVX2-NEXT:    vpor %xmm3, %xmm2, %xmm3
+; XOPAVX2-NEXT:    vpcomltuq %xmm1, %xmm0, %xmm4
+; XOPAVX2-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOPAVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOPAVX2-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOPAVX2-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOPAVX2-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOPAVX2-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOPAVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOPAVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: vec128_i64_unsigned_reg_reg:
 ; AVX512F:       # %bb.0:
@@ -1421,7 +1505,8 @@
 ; AVX2-FALLBACK:       # %bb.0:
 ; AVX2-FALLBACK-NEXT:    vmovdqa (%rdi), %xmm1
 ; AVX2-FALLBACK-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX2-FALLBACK-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX2-FALLBACK-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [1,1]
+; AVX2-FALLBACK-NEXT:    vpor %xmm3, %xmm2, %xmm3
 ; AVX2-FALLBACK-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm4
 ; AVX2-FALLBACK-NEXT:    vblendvpd %xmm4, %xmm1, %xmm0, %xmm4
 ; AVX2-FALLBACK-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
@@ -1438,26 +1523,69 @@
 ; AVX2-FALLBACK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
 ; AVX2-FALLBACK-NEXT:    retq
 ;
-; XOP-LABEL: vec128_i64_signed_mem_reg:
-; XOP:       # %bb.0:
-; XOP-NEXT:    vmovdqa (%rdi), %xmm1
-; XOP-NEXT:    vpcomgtq %xmm0, %xmm1, %xmm2
-; XOP-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
-; XOP-NEXT:    vpcomltq %xmm0, %xmm1, %xmm4
-; XOP-NEXT:    vblendvpd %xmm4, %xmm1, %xmm0, %xmm4
-; XOP-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
-; XOP-NEXT:    vpsubq %xmm4, %xmm0, %xmm0
-; XOP-NEXT:    vpsrlq $1, %xmm0, %xmm2
-; XOP-NEXT:    vpsrlq $33, %xmm0, %xmm0
-; XOP-NEXT:    vpmuludq %xmm3, %xmm0, %xmm0
-; XOP-NEXT:    vpsrlq $32, %xmm3, %xmm4
-; XOP-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
-; XOP-NEXT:    vpaddq %xmm0, %xmm4, %xmm0
-; XOP-NEXT:    vpsllq $32, %xmm0, %xmm0
-; XOP-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
-; XOP-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; XOP-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
-; XOP-NEXT:    retq
+; XOP-FALLBACK-LABEL: vec128_i64_signed_mem_reg:
+; XOP-FALLBACK:       # %bb.0:
+; XOP-FALLBACK-NEXT:    vmovdqa (%rdi), %xmm1
+; XOP-FALLBACK-NEXT:    vpcomgtq %xmm0, %xmm1, %xmm2
+; XOP-FALLBACK-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; XOP-FALLBACK-NEXT:    vpcomltq %xmm0, %xmm1, %xmm4
+; XOP-FALLBACK-NEXT:    vblendvpd %xmm4, %xmm1, %xmm0, %xmm4
+; XOP-FALLBACK-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; XOP-FALLBACK-NEXT:    vpsubq %xmm4, %xmm0, %xmm0
+; XOP-FALLBACK-NEXT:    vpsrlq $1, %xmm0, %xmm2
+; XOP-FALLBACK-NEXT:    vpsrlq $33, %xmm0, %xmm0
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm3, %xmm0, %xmm0
+; XOP-FALLBACK-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOP-FALLBACK-NEXT:    vpaddq %xmm0, %xmm4, %xmm0
+; XOP-FALLBACK-NEXT:    vpsllq $32, %xmm0, %xmm0
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOP-FALLBACK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; XOP-FALLBACK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOP-FALLBACK-NEXT:    retq
+;
+; XOPAVX1-LABEL: vec128_i64_signed_mem_reg:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vmovdqa (%rdi), %xmm1
+; XOPAVX1-NEXT:    vpcomgtq %xmm0, %xmm1, %xmm2
+; XOPAVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; XOPAVX1-NEXT:    vpcomltq %xmm0, %xmm1, %xmm4
+; XOPAVX1-NEXT:    vblendvpd %xmm4, %xmm1, %xmm0, %xmm4
+; XOPAVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpsubq %xmm4, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpsrlq $1, %xmm0, %xmm2
+; XOPAVX1-NEXT:    vpsrlq $33, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpmuludq %xmm3, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOPAVX1-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOPAVX1-NEXT:    vpaddq %xmm0, %xmm4, %xmm0
+; XOPAVX1-NEXT:    vpsllq $32, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: vec128_i64_signed_mem_reg:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vmovdqa (%rdi), %xmm1
+; XOPAVX2-NEXT:    vpcomgtq %xmm0, %xmm1, %xmm2
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [1,1]
+; XOPAVX2-NEXT:    vpor %xmm3, %xmm2, %xmm3
+; XOPAVX2-NEXT:    vpcomltq %xmm0, %xmm1, %xmm4
+; XOPAVX2-NEXT:    vblendvpd %xmm4, %xmm1, %xmm0, %xmm4
+; XOPAVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vpsubq %xmm4, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vpsrlq $1, %xmm0, %xmm2
+; XOPAVX2-NEXT:    vpsrlq $33, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vpmuludq %xmm3, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOPAVX2-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOPAVX2-NEXT:    vpaddq %xmm0, %xmm4, %xmm0
+; XOPAVX2-NEXT:    vpsllq $32, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOPAVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: vec128_i64_signed_mem_reg:
 ; AVX512F:       # %bb.0:
@@ -1644,7 +1772,8 @@
 ; AVX2-FALLBACK:       # %bb.0:
 ; AVX2-FALLBACK-NEXT:    vmovdqa (%rdi), %xmm1
 ; AVX2-FALLBACK-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX2-FALLBACK-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX2-FALLBACK-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [1,1]
+; AVX2-FALLBACK-NEXT:    vpor %xmm3, %xmm2, %xmm3
 ; AVX2-FALLBACK-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm4
 ; AVX2-FALLBACK-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
 ; AVX2-FALLBACK-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
@@ -1661,26 +1790,69 @@
 ; AVX2-FALLBACK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
 ; AVX2-FALLBACK-NEXT:    retq
 ;
-; XOP-LABEL: vec128_i64_signed_reg_mem:
-; XOP:       # %bb.0:
-; XOP-NEXT:    vmovdqa (%rdi), %xmm1
-; XOP-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
-; XOP-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
-; XOP-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
-; XOP-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
-; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
-; XOP-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
-; XOP-NEXT:    vpsrlq $1, %xmm1, %xmm2
-; XOP-NEXT:    vpsrlq $33, %xmm1, %xmm1
-; XOP-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
-; XOP-NEXT:    vpsrlq $32, %xmm3, %xmm4
-; XOP-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
-; XOP-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
-; XOP-NEXT:    vpsllq $32, %xmm1, %xmm1
-; XOP-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
-; XOP-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
-; XOP-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
-; XOP-NEXT:    retq
+; XOP-FALLBACK-LABEL: vec128_i64_signed_reg_mem:
+; XOP-FALLBACK:       # %bb.0:
+; XOP-FALLBACK-NEXT:    vmovdqa (%rdi), %xmm1
+; XOP-FALLBACK-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
+; XOP-FALLBACK-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; XOP-FALLBACK-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
+; XOP-FALLBACK-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOP-FALLBACK-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOP-FALLBACK-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOP-FALLBACK-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOP-FALLBACK-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOP-FALLBACK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOP-FALLBACK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOP-FALLBACK-NEXT:    retq
+;
+; XOPAVX1-LABEL: vec128_i64_signed_reg_mem:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vmovdqa (%rdi), %xmm1
+; XOPAVX1-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
+; XOPAVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; XOPAVX1-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
+; XOPAVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOPAVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOPAVX1-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOPAVX1-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOPAVX1-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOPAVX1-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOPAVX1-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: vec128_i64_signed_reg_mem:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vmovdqa (%rdi), %xmm1
+; XOPAVX2-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [1,1]
+; XOPAVX2-NEXT:    vpor %xmm3, %xmm2, %xmm3
+; XOPAVX2-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
+; XOPAVX2-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOPAVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOPAVX2-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOPAVX2-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOPAVX2-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOPAVX2-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOPAVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOPAVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: vec128_i64_signed_reg_mem:
 ; AVX512F:       # %bb.0:
@@ -1871,7 +2043,8 @@
 ; AVX2-FALLBACK-NEXT:    vmovdqa (%rdi), %xmm0
 ; AVX2-FALLBACK-NEXT:    vmovdqa (%rsi), %xmm1
 ; AVX2-FALLBACK-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX2-FALLBACK-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX2-FALLBACK-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [1,1]
+; AVX2-FALLBACK-NEXT:    vpor %xmm3, %xmm2, %xmm3
 ; AVX2-FALLBACK-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm4
 ; AVX2-FALLBACK-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
 ; AVX2-FALLBACK-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
@@ -1888,27 +2061,72 @@
 ; AVX2-FALLBACK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
 ; AVX2-FALLBACK-NEXT:    retq
 ;
-; XOP-LABEL: vec128_i64_signed_mem_mem:
-; XOP:       # %bb.0:
-; XOP-NEXT:    vmovdqa (%rdi), %xmm0
-; XOP-NEXT:    vmovdqa (%rsi), %xmm1
-; XOP-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
-; XOP-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
-; XOP-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
-; XOP-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
-; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
-; XOP-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
-; XOP-NEXT:    vpsrlq $1, %xmm1, %xmm2
-; XOP-NEXT:    vpsrlq $33, %xmm1, %xmm1
-; XOP-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
-; XOP-NEXT:    vpsrlq $32, %xmm3, %xmm4
-; XOP-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
-; XOP-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
-; XOP-NEXT:    vpsllq $32, %xmm1, %xmm1
-; XOP-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
-; XOP-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
-; XOP-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
-; XOP-NEXT:    retq
+; XOP-FALLBACK-LABEL: vec128_i64_signed_mem_mem:
+; XOP-FALLBACK:       # %bb.0:
+; XOP-FALLBACK-NEXT:    vmovdqa (%rdi), %xmm0
+; XOP-FALLBACK-NEXT:    vmovdqa (%rsi), %xmm1
+; XOP-FALLBACK-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
+; XOP-FALLBACK-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; XOP-FALLBACK-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
+; XOP-FALLBACK-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOP-FALLBACK-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOP-FALLBACK-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOP-FALLBACK-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOP-FALLBACK-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOP-FALLBACK-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOP-FALLBACK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOP-FALLBACK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOP-FALLBACK-NEXT:    retq
+;
+; XOPAVX1-LABEL: vec128_i64_signed_mem_mem:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vmovdqa (%rdi), %xmm0
+; XOPAVX1-NEXT:    vmovdqa (%rsi), %xmm1
+; XOPAVX1-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
+; XOPAVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; XOPAVX1-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
+; XOPAVX1-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOPAVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOPAVX1-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOPAVX1-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOPAVX1-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOPAVX1-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOPAVX1-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: vec128_i64_signed_mem_mem:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vmovdqa (%rdi), %xmm0
+; XOPAVX2-NEXT:    vmovdqa (%rsi), %xmm1
+; XOPAVX2-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm2
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [1,1]
+; XOPAVX2-NEXT:    vpor %xmm3, %xmm2, %xmm3
+; XOPAVX2-NEXT:    vpcomltq %xmm1, %xmm0, %xmm4
+; XOPAVX2-NEXT:    vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
+; XOPAVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsubq %xmm4, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsrlq $1, %xmm1, %xmm2
+; XOPAVX2-NEXT:    vpsrlq $33, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpmuludq %xmm3, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsrlq $32, %xmm3, %xmm4
+; XOPAVX2-NEXT:    vpmuludq %xmm4, %xmm2, %xmm4
+; XOPAVX2-NEXT:    vpaddq %xmm1, %xmm4, %xmm1
+; XOPAVX2-NEXT:    vpsllq $32, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; XOPAVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; XOPAVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: vec128_i64_signed_mem_mem:
 ; AVX512F:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll
--- a/llvm/test/CodeGen/X86/pmul.ll
+++ b/llvm/test/CodeGen/X86/pmul.ll
@@ -120,15 +120,25 @@
 ; SSE-NEXT:    paddq %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: mul_v2i64c:
-; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [117,117]
-; AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
-; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
-; AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
-; AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    retq
+; AVX2-LABEL: mul_v2i64c:
+; AVX2:       # %bb.0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [117,117]
+; AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm0
+; AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpsllq $32, %xmm0, %xmm0
+; AVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: mul_v2i64c:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [117,117]
+; AVX512-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
+; AVX512-NEXT:    vpsrlq $32, %xmm0, %xmm0
+; AVX512-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpsllq $32, %xmm0, %xmm0
+; AVX512-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; AVX512-NEXT:    retq
 entry:
   %A = mul <2 x i64> %i, < i64 117, i64 117 >
   ret <2 x i64> %A
diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -1207,8 +1207,11 @@
 ; AVX2-LABEL: v2i64:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vmovapd {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT:    vblendvpd %xmm2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775807,9223372036854775807]
+; AVX2-NEXT:    # xmm3 = mem[0,0]
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    # xmm4 = mem[0,0]
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm3, %xmm4, %xmm3
 ; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    vblendvpd %xmm0, %xmm3, %xmm2, %xmm0
diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll
--- a/llvm/test/CodeGen/X86/sat-add.ll
+++ b/llvm/test/CodeGen/X86/sat-add.ll
@@ -657,12 +657,15 @@
 ;
 ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_min:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovapd {{.*#+}} xmm1 = [18446744073709551573,18446744073709551573]
-; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765]
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = [18446744073709551573,18446744073709551573]
+; AVX2-NEXT:    # xmm1 = mem[0,0]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854775765,9223372036854775765]
 ; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
 ; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [42,42]
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: unsigned_sat_constant_v2i64_using_min:
@@ -726,11 +729,12 @@
 ;
 ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [42,42]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1
 ; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -793,11 +797,12 @@
 ;
 ; AVX2-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [42,42]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1
 ; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -1206,8 +1211,10 @@
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
-; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
-; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm4
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm3, %xmm0, %xmm3
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm4 = [9223372036854775807,9223372036854775807]
+; AVX2-NEXT:    vpxor %xmm4, %xmm1, %xmm4
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
 ; AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
 ; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
@@ -1275,7 +1282,7 @@
 ;
 ; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
 ; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm1
@@ -1344,8 +1351,10 @@
 ; AVX2-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854775807,9223372036854775807]
+; AVX2-NEXT:    vpxor %xmm3, %xmm1, %xmm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
--- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
@@ -1304,8 +1304,11 @@
 ; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
 ; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm0, %xmm2, %xmm0
-; AVX2-NEXT:    vmovapd {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT:    vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775807,9223372036854775807]
+; AVX2-NEXT:    # xmm2 = mem[0,0]
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    # xmm3 = mem[0,0]
+; AVX2-NEXT:    vblendvpd %xmm1, %xmm2, %xmm3, %xmm2
 ; AVX2-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
--- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
@@ -906,7 +906,7 @@
 ;
 ; AVX2-LABEL: v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
 ; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm1
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
@@ -240,15 +240,17 @@
 ;
 ; CHECK-AVX2-LABEL: t3_wide:
 ; CHECK-AVX2:       # %bb.0:
-; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [12297829382473034411,12297829382473034411]
-; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
-; CHECK-AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm3
-; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
-; CHECK-AVX2-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2863311530,2863311530]
+; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
+; CHECK-AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm2
+; CHECK-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [12297829382473034411,12297829382473034411]
+; CHECK-AVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; CHECK-AVX2-NEXT:    vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpsllq $32, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpmuludq %xmm3, %xmm0, %xmm0
 ; CHECK-AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; CHECK-AVX2-NEXT:    vpsllq $32, %xmm0, %xmm0
-; CHECK-AVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
-; CHECK-AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; CHECK-AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll
--- a/llvm/test/CodeGen/X86/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll
@@ -817,7 +817,7 @@
 ;
 ; AVX2-LABEL: v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
--- a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
+++ b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
@@ -342,7 +342,7 @@
 ;
 ; AVX2-LABEL: ge_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
@@ -516,7 +516,7 @@
 ;
 ; AVX2-LABEL: gt_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
@@ -764,7 +764,7 @@
 ;
 ; AVX2-LABEL: le_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
@@ -939,7 +939,7 @@
 ;
 ; AVX2-LABEL: lt_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll
--- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll
+++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll
@@ -344,15 +344,27 @@
 ; SSE-NEXT:    orpd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; VEX-LABEL: fptoui_2f64_to_4i32:
-; VEX:       # %bb.0:
-; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm1
-; VEX-NEXT:    vpsrad $31, %xmm1, %xmm2
-; VEX-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
-; VEX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
-; VEX-NEXT:    vorpd %xmm0, %xmm1, %xmm0
-; VEX-NEXT:    retq
+; AVX1-LABEL: fptoui_2f64_to_4i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm1
+; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
+; AVX1-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX1-NEXT:    vandpd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vorpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: fptoui_2f64_to_4i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = [-2.147483648E+9,-2.147483648E+9]
+; AVX2-NEXT:    # xmm1 = mem[0,0]
+; AVX2-NEXT:    vaddpd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vcvttpd2dq %xmm1, %xmm1
+; AVX2-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm2
+; AVX2-NEXT:    vandpd %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vorpd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: fptoui_2f64_to_4i32:
 ; AVX512F:       # %bb.0:
@@ -396,15 +408,27 @@
 ; SSE-NEXT:    orpd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; VEX-LABEL: fptoui_2f64_to_2i32:
-; VEX:       # %bb.0:
-; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm1
-; VEX-NEXT:    vpsrad $31, %xmm1, %xmm2
-; VEX-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
-; VEX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
-; VEX-NEXT:    vorpd %xmm0, %xmm1, %xmm0
-; VEX-NEXT:    retq
+; AVX1-LABEL: fptoui_2f64_to_2i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm1
+; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
+; AVX1-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX1-NEXT:    vandpd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vorpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: fptoui_2f64_to_2i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = [-2.147483648E+9,-2.147483648E+9]
+; AVX2-NEXT:    # xmm1 = mem[0,0]
+; AVX2-NEXT:    vaddpd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vcvttpd2dq %xmm1, %xmm1
+; AVX2-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm2
+; AVX2-NEXT:    vandpd %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vorpd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: fptoui_2f64_to_2i32:
 ; AVX512F:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -52,14 +52,23 @@
 ; SSE41-NEXT:    cvtpd2ps %xmm0, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; VEX-LABEL: uitofp_2i32_to_2f32:
-; VEX:       # %bb.0:
-; VEX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; VEX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
-; VEX-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; VEX-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
-; VEX-NEXT:    vcvtpd2ps %xmm0, %xmm0
-; VEX-NEXT:    retq
+; AVX1-LABEL: uitofp_2i32_to_2f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_2i32_to_2f32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
+; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vcvtpd2ps %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: uitofp_2i32_to_2f32:
 ; AVX512F:       # %bb.0:
@@ -604,10 +613,14 @@
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [4841369599423283200,4841369599423283200]
+; AVX2-NEXT:    vpor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm0
-; AVX2-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [4985484787499139072,4985484787499139072]
+; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm2 = [1.9342813118337666E+25,1.9342813118337666E+25]
+; AVX2-NEXT:    # xmm2 = mem[0,0]
+; AVX2-NEXT:    vsubpd %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -667,13 +680,21 @@
 ; SSE41-NEXT:    subpd %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; VEX-LABEL: uitofp_2i32_to_2f64:
-; VEX:       # %bb.0:
-; VEX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; VEX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
-; VEX-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; VEX-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
-; VEX-NEXT:    retq
+; AVX1-LABEL: uitofp_2i32_to_2f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_2i32_to_2f64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
+; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: uitofp_2i32_to_2f64:
 ; AVX512F:       # %bb.0:
@@ -1908,23 +1929,42 @@
 ; SSE41-NEXT:    movaps %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; VEX-LABEL: uitofp_2i64_to_4f32:
-; VEX:       # %bb.0:
-; VEX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; VEX-NEXT:    vpsrlq $1, %xmm0, %xmm2
-; VEX-NEXT:    vpor %xmm1, %xmm2, %xmm1
-; VEX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
-; VEX-NEXT:    vpextrq $1, %xmm1, %rax
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
-; VEX-NEXT:    vmovq %xmm1, %rax
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
-; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
-; VEX-NEXT:    vaddps %xmm1, %xmm1, %xmm2
-; VEX-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; VEX-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
-; VEX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
-; VEX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
-; VEX-NEXT:    retq
+; AVX1-LABEL: uitofp_2i64_to_4f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm2
+; AVX1-NEXT:    vpor %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX1-NEXT:    vmovq %xmm1, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
+; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_2i64_to_4f32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [1,1]
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpsrlq $1, %xmm0, %xmm2
+; AVX2-NEXT:    vpor %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX2-NEXT:    vmovq %xmm1, %rax
+; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
+; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
+; AVX2-NEXT:    vaddps %xmm1, %xmm1, %xmm2
+; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: uitofp_2i64_to_4f32:
 ; AVX512F:       # %bb.0:
@@ -2023,24 +2063,44 @@
 ; SSE41-NEXT:    movq {{.*#+}} xmm0 = xmm3[0],zero
 ; SSE41-NEXT:    retq
 ;
-; VEX-LABEL: uitofp_2i64_to_2f32:
-; VEX:       # %bb.0:
-; VEX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; VEX-NEXT:    vpsrlq $1, %xmm0, %xmm2
-; VEX-NEXT:    vpor %xmm1, %xmm2, %xmm1
-; VEX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
-; VEX-NEXT:    vpextrq $1, %xmm1, %rax
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
-; VEX-NEXT:    vmovq %xmm1, %rax
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
-; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
-; VEX-NEXT:    vaddps %xmm1, %xmm1, %xmm2
-; VEX-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; VEX-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
-; VEX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
-; VEX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
-; VEX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
-; VEX-NEXT:    retq
+; AVX1-LABEL: uitofp_2i64_to_2f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm2
+; AVX1-NEXT:    vpor %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX1-NEXT:    vmovq %xmm1, %rax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
+; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_2i64_to_2f32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [1,1]
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpsrlq $1, %xmm0, %xmm2
+; AVX2-NEXT:    vpor %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX2-NEXT:    vmovq %xmm1, %rax
+; AVX2-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
+; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
+; AVX2-NEXT:    vaddps %xmm1, %xmm1, %xmm2
+; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: uitofp_2i64_to_2f32:
 ; AVX512F:       # %bb.0:
@@ -3276,10 +3336,14 @@
 ; AVX2-NEXT:    vmovdqa (%rdi), %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [4841369599423283200,4841369599423283200]
+; AVX2-NEXT:    vpor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm0
-; AVX2-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [4985484787499139072,4985484787499139072]
+; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm2 = [1.9342813118337666E+25,1.9342813118337666E+25]
+; AVX2-NEXT:    # xmm2 = mem[0,0]
+; AVX2-NEXT:    vsubpd %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -3343,13 +3407,21 @@
 ; SSE41-NEXT:    subpd %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; VEX-LABEL: uitofp_load_2i32_to_2f64:
-; VEX:       # %bb.0:
-; VEX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
-; VEX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
-; VEX-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; VEX-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
-; VEX-NEXT:    retq
+; AVX1-LABEL: uitofp_load_2i32_to_2f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_load_2i32_to_2f64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
+; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: uitofp_load_2i32_to_2f64:
 ; AVX512F:       # %bb.0:
@@ -5658,15 +5730,17 @@
 ;
 ; AVX2-LABEL: PR43609:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2,2]
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
 ; AVX2-NEXT:    vpor %xmm4, %xmm3, %xmm3
 ; AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
 ; AVX2-NEXT:    vpor %xmm5, %xmm0, %xmm0
-; AVX2-NEXT:    vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
+; AVX2-NEXT:    # xmm6 = mem[0,0]
 ; AVX2-NEXT:    vsubpd %xmm6, %xmm0, %xmm0
 ; AVX2-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
@@ -5675,7 +5749,8 @@
 ; AVX2-NEXT:    vpor %xmm5, %xmm1, %xmm1
 ; AVX2-NEXT:    vsubpd %xmm6, %xmm1, %xmm1
 ; AVX2-NEXT:    vaddpd %xmm1, %xmm2, %xmm1
-; AVX2-NEXT:    vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1]
+; AVX2-NEXT:    # xmm2 = mem[0,0]
 ; AVX2-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vaddpd %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vmovupd %xmm0, (%rdi)
diff --git a/llvm/test/CodeGen/X86/vec_minmax_uint.ll b/llvm/test/CodeGen/X86/vec_minmax_uint.ll
--- a/llvm/test/CodeGen/X86/vec_minmax_uint.ll
+++ b/llvm/test/CodeGen/X86/vec_minmax_uint.ll
@@ -71,7 +71,7 @@
 ;
 ; AVX2-LABEL: max_gt_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
@@ -477,7 +477,7 @@
 ;
 ; AVX2-LABEL: max_ge_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
@@ -882,7 +882,7 @@
 ;
 ; AVX2-LABEL: min_lt_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
@@ -1290,7 +1290,7 @@
 ;
 ; AVX2-LABEL: min_le_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll
--- a/llvm/test/CodeGen/X86/vec_uaddo.ll
+++ b/llvm/test/CodeGen/X86/vec_uaddo.ll
@@ -855,16 +855,27 @@
 ; SSE-NEXT:    movdqa %xmm1, (%rdi)
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: uaddo_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm3
-; AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
-; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm0
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
-; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT:    vmovdqa %xmm1, (%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: uaddo_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm0
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uaddo_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm0
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    vmovdqa %xmm1, (%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: uaddo_v2i64:
 ; AVX512:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll
--- a/llvm/test/CodeGen/X86/vec_usubo.ll
+++ b/llvm/test/CodeGen/X86/vec_usubo.ll
@@ -902,16 +902,27 @@
 ; SSE-NEXT:    movdqa %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: usubo_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm3
-; AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
-; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm0
-; AVX-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm0
-; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT:    vmovdqa %xmm1, (%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: usubo_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm0
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: usubo_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm0
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    vmovdqa %xmm1, (%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: usubo_v2i64:
 ; AVX512:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -470,10 +470,21 @@
 ; GFNISSE-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; GFNISSE-NEXT:    retq
 ;
-; GFNIAVX-LABEL: test_bitreverse_v16i8:
-; GFNIAVX:       # %bb.0:
-; GFNIAVX-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX-NEXT:    retq
+; GFNIAVX1-LABEL: test_bitreverse_v16i8:
+; GFNIAVX1:       # %bb.0:
+; GFNIAVX1-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX1-NEXT:    retq
+;
+; GFNIAVX2-LABEL: test_bitreverse_v16i8:
+; GFNIAVX2:       # %bb.0:
+; GFNIAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; GFNIAVX2-NEXT:    vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0
+; GFNIAVX2-NEXT:    retq
+;
+; GFNIAVX512-LABEL: test_bitreverse_v16i8:
+; GFNIAVX512:       # %bb.0:
+; GFNIAVX512-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX512-NEXT:    retq
   %b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
   ret <16 x i8> %b
 }
@@ -549,11 +560,24 @@
 ; GFNISSE-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; GFNISSE-NEXT:    retq
 ;
-; GFNIAVX-LABEL: test_bitreverse_v8i16:
-; GFNIAVX:       # %bb.0:
-; GFNIAVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
-; GFNIAVX-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX-NEXT:    retq
+; GFNIAVX1-LABEL: test_bitreverse_v8i16:
+; GFNIAVX1:       # %bb.0:
+; GFNIAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; GFNIAVX1-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX1-NEXT:    retq
+;
+; GFNIAVX2-LABEL: test_bitreverse_v8i16:
+; GFNIAVX2:       # %bb.0:
+; GFNIAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; GFNIAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; GFNIAVX2-NEXT:    vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0
+; GFNIAVX2-NEXT:    retq
+;
+; GFNIAVX512-LABEL: test_bitreverse_v8i16:
+; GFNIAVX512:       # %bb.0:
+; GFNIAVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; GFNIAVX512-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX512-NEXT:    retq
   %b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
   ret <8 x i16> %b
 }
@@ -634,11 +658,24 @@
 ; GFNISSE-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; GFNISSE-NEXT:    retq
 ;
-; GFNIAVX-LABEL: test_bitreverse_v4i32:
-; GFNIAVX:       # %bb.0:
-; GFNIAVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; GFNIAVX-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX-NEXT:    retq
+; GFNIAVX1-LABEL: test_bitreverse_v4i32:
+; GFNIAVX1:       # %bb.0:
+; GFNIAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; GFNIAVX1-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX1-NEXT:    retq
+;
+; GFNIAVX2-LABEL: test_bitreverse_v4i32:
+; GFNIAVX2:       # %bb.0:
+; GFNIAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; GFNIAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; GFNIAVX2-NEXT:    vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0
+; GFNIAVX2-NEXT:    retq
+;
+; GFNIAVX512-LABEL: test_bitreverse_v4i32:
+; GFNIAVX512:       # %bb.0:
+; GFNIAVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; GFNIAVX512-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX512-NEXT:    retq
   %b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
   ret <4 x i32> %b
 }
@@ -721,11 +758,24 @@
 ; GFNISSE-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; GFNISSE-NEXT:    retq
 ;
-; GFNIAVX-LABEL: test_bitreverse_v2i64:
-; GFNIAVX:       # %bb.0:
-; GFNIAVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
-; GFNIAVX-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX-NEXT:    retq
+; GFNIAVX1-LABEL: test_bitreverse_v2i64:
+; GFNIAVX1:       # %bb.0:
+; GFNIAVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; GFNIAVX1-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX1-NEXT:    retq
+;
+; GFNIAVX2-LABEL: test_bitreverse_v2i64:
+; GFNIAVX2:       # %bb.0:
+; GFNIAVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; GFNIAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; GFNIAVX2-NEXT:    vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0
+; GFNIAVX2-NEXT:    retq
+;
+; GFNIAVX512-LABEL: test_bitreverse_v2i64:
+; GFNIAVX512:       # %bb.0:
+; GFNIAVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; GFNIAVX512-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX512-NEXT:    retq
   %b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
   ret <2 x i64> %b
 }
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -1034,16 +1034,27 @@
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: bool_reduction_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskpd %xmm0, %eax
-; AVX-NEXT:    cmpb $3, %al
-; AVX-NEXT:    sete %al
-; AVX-NEXT:    retq
+; AVX1-LABEL: bool_reduction_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vmovmskpd %xmm0, %eax
+; AVX1-NEXT:    cmpb $3, %al
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: bool_reduction_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vmovmskpd %xmm0, %eax
+; AVX2-NEXT:    cmpb $3, %al
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: bool_reduction_v2i64:
 ; AVX512:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -989,16 +989,27 @@
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: bool_reduction_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskpd %xmm0, %eax
-; AVX-NEXT:    testl %eax, %eax
-; AVX-NEXT:    setne %al
-; AVX-NEXT:    retq
+; AVX1-LABEL: bool_reduction_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vmovmskpd %xmm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: bool_reduction_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vmovmskpd %xmm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: bool_reduction_v2i64:
 ; AVX512:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -84,7 +84,7 @@
 ;
 ; AVX2-LABEL: var_funnnel_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
 ; AVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm4
 ; AVX2-NEXT:    vpsrlq $1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsrlvq %xmm4, %xmm1, %xmm1
@@ -167,7 +167,7 @@
 ;
 ; XOPAVX2-LABEL: var_funnnel_v2i64:
 ; XOPAVX2:       # %bb.0:
-; XOPAVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
 ; XOPAVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm4
 ; XOPAVX2-NEXT:    vpsrlq $1, %xmm1, %xmm1
 ; XOPAVX2-NEXT:    vpsrlvq %xmm4, %xmm1, %xmm1
@@ -952,16 +952,27 @@
 ; SSE-NEXT:    por %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
-; AVX-NEXT:    vpandn %xmm3, %xmm2, %xmm4
-; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
-; AVX-NEXT:    vpand %xmm3, %xmm2, %xmm2
-; AVX-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; AVX1-NEXT:    vpandn %xmm3, %xmm2, %xmm4
+; AVX1-NEXT:    vpsrlq $1, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm4
+; AVX2-NEXT:    vpsrlq $1, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
+; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: splatvar_funnnel_v2i64:
 ; AVX512F:       # %bb.0:
@@ -1023,16 +1034,27 @@
 ; AVX512VLVBMI2-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0
 ; AVX512VLVBMI2-NEXT:    retq
 ;
-; XOP-LABEL: splatvar_funnnel_v2i64:
-; XOP:       # %bb.0:
-; XOP-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
-; XOP-NEXT:    vpandn %xmm3, %xmm2, %xmm4
-; XOP-NEXT:    vpsrlq $1, %xmm1, %xmm1
-; XOP-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
-; XOP-NEXT:    vpand %xmm3, %xmm2, %xmm2
-; XOP-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
-; XOP-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; XOP-NEXT:    retq
+; XOPAVX1-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; XOPAVX1-NEXT:    vpandn %xmm3, %xmm2, %xmm4
+; XOPAVX1-NEXT:    vpsrlq $1, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm4
+; XOPAVX2-NEXT:    vpsrlq $1, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
 ;
 ; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
 ; X86-SSE2:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -761,7 +761,7 @@
 ;
 ; AVX2-LABEL: splatvar_funnnel_v4i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
 ; AVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm4
 ; AVX2-NEXT:    vpsrlq $1, %ymm1, %ymm1
 ; AVX2-NEXT:    vpsrlq %xmm4, %ymm1, %ymm1
@@ -849,7 +849,7 @@
 ;
 ; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
 ; XOPAVX2:       # %bb.0:
-; XOPAVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
 ; XOPAVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm4
 ; XOPAVX2-NEXT:    vpsrlq $1, %ymm1, %ymm1
 ; XOPAVX2-NEXT:    vpsrlq %xmm4, %ymm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -87,7 +87,7 @@
 ;
 ; AVX2-LABEL: var_funnnel_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
 ; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX2-NEXT:    vpsllvq %xmm3, %xmm0, %xmm3
 ; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
@@ -731,17 +731,29 @@
 ; SSE-NEXT:    por %xmm4, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
-; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm3
-; AVX-NEXT:    vpsllq %xmm3, %xmm0, %xmm3
-; AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
-; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpor %xmm0, %xmm3, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vpsllq %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm3
+; AVX2-NEXT:    vpsllq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; AVX2-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
+; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpor %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: splatvar_funnnel_v2i64:
 ; AVX512F:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
@@ -587,7 +587,7 @@
 ;
 ; AVX2-LABEL: splatvar_funnnel_v4i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
 ; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX2-NEXT:    vpsllq %xmm3, %ymm0, %ymm3
 ; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -84,7 +84,7 @@
 ;
 ; AVX2-LABEL: var_funnnel_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
 ; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX2-NEXT:    vpsrlvq %xmm4, %xmm1, %xmm1
 ; AVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm2
@@ -168,7 +168,7 @@
 ;
 ; XOPAVX2-LABEL: var_funnnel_v2i64:
 ; XOPAVX2:       # %bb.0:
-; XOPAVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
 ; XOPAVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; XOPAVX2-NEXT:    vpsrlvq %xmm4, %xmm1, %xmm1
 ; XOPAVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm2
@@ -1041,16 +1041,27 @@
 ; SSE-NEXT:    por %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
-; AVX-NEXT:    vpand %xmm3, %xmm2, %xmm4
-; AVX-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
-; AVX-NEXT:    vpandn %xmm3, %xmm2, %xmm2
-; AVX-NEXT:    vpsllq $1, %xmm0, %xmm0
-; AVX-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm4
+; AVX1-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpandn %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllq $1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
+; AVX2-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
+; AVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vpsllq $1, %xmm0, %xmm0
+; AVX2-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: splatvar_funnnel_v2i64:
 ; AVX512F:       # %bb.0:
@@ -1113,16 +1124,27 @@
 ; AVX512VLVBMI2-NEXT:    vmovdqa %xmm1, %xmm0
 ; AVX512VLVBMI2-NEXT:    retq
 ;
-; XOP-LABEL: splatvar_funnnel_v2i64:
-; XOP:       # %bb.0:
-; XOP-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
-; XOP-NEXT:    vpand %xmm3, %xmm2, %xmm4
-; XOP-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
-; XOP-NEXT:    vpandn %xmm3, %xmm2, %xmm2
-; XOP-NEXT:    vpsllq $1, %xmm0, %xmm0
-; XOP-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
-; XOP-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; XOP-NEXT:    retq
+; XOPAVX1-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; XOPAVX1-NEXT:    vpand %xmm3, %xmm2, %xmm4
+; XOPAVX1-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpandn %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsllq $1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
+; XOPAVX2-NEXT:    vpsrlq %xmm4, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpsllq $1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vpsllq %xmm2, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
 ;
 ; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
 ; X86-SSE2:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -792,7 +792,7 @@
 ;
 ; AVX2-LABEL: splatvar_funnnel_v4i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
 ; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX2-NEXT:    vpsrlq %xmm4, %ymm1, %ymm1
 ; AVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm2
@@ -881,7 +881,7 @@
 ;
 ; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
 ; XOPAVX2:       # %bb.0:
-; XOPAVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [63,63]
 ; XOPAVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; XOPAVX2-NEXT:    vpsrlq %xmm4, %ymm1, %ymm1
 ; XOPAVX2-NEXT:    vpandn %xmm3, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -87,7 +87,7 @@
 ;
 ; AVX2-LABEL: var_funnnel_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
 ; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX2-NEXT:    vpsrlvq %xmm3, %xmm0, %xmm3
 ; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
@@ -753,17 +753,29 @@
 ; SSE-NEXT:    por %xmm4, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
-; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm3
-; AVX-NEXT:    vpsrlq %xmm3, %xmm0, %xmm3
-; AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
-; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpor %xmm0, %xmm3, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vpsrlq %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm3
+; AVX2-NEXT:    vpsrlq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; AVX2-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
+; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpor %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: splatvar_funnnel_v2i64:
 ; AVX512F:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -613,7 +613,7 @@
 ;
 ; AVX2-LABEL: splatvar_funnnel_v4i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
 ; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm3
 ; AVX2-NEXT:    vpsrlq %xmm3, %ymm0, %ymm3
 ; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll
--- a/llvm/test/CodeGen/X86/vector-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-mul.ll
@@ -1490,14 +1490,15 @@
 ;
 ; X64-AVX2-LABEL: mul_v2i64_neg_15_63:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709551601,18446744073709551553]
-; X64-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
-; X64-AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm3
-; X64-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
-; X64-AVX2-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295]
+; X64-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
+; X64-AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm2
+; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [18446744073709551601,18446744073709551553]
+; X64-AVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpaddq %xmm2, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpsllq $32, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpmuludq %xmm3, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT:    vpsllq $32, %xmm0, %xmm0
-; X64-AVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
 ; X64-AVX2-NEXT:    retq
 ;
 ; X64-AVX512DQ-LABEL: mul_v2i64_neg_15_63:
@@ -1551,14 +1552,15 @@
 ;
 ; X64-AVX2-LABEL: mul_v2i64_neg_17_65:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709551599,18446744073709551551]
-; X64-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
-; X64-AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm3
-; X64-AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
-; X64-AVX2-NEXT:    vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295]
+; X64-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
+; X64-AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm2
+; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [18446744073709551599,18446744073709551551]
+; X64-AVX2-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpaddq %xmm2, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpsllq $32, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpmuludq %xmm3, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT:    vpsllq $32, %xmm0, %xmm0
-; X64-AVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
 ; X64-AVX2-NEXT:    retq
 ;
 ; X64-AVX512DQ-LABEL: mul_v2i64_neg_17_65:
diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll
--- a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll
+++ b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll
@@ -17262,7 +17262,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2,2]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v2i64:
@@ -17447,7 +17448,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [3,3]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [3,3]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -17634,7 +17635,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [3,3]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v2i64:
@@ -17819,7 +17821,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,4]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4,4]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -18006,7 +18008,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4,4]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v2i64:
@@ -18191,7 +18194,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [5,5]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [5,5]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -18378,7 +18381,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [5,5]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v2i64:
@@ -18563,7 +18567,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [6,6]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [6,6]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -18750,7 +18754,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [6,6]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v2i64:
@@ -18935,7 +18940,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [7,7]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [7,7]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -19122,7 +19127,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [7,7]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_7_v2i64:
@@ -19307,7 +19313,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [8,8]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [8,8]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -19494,7 +19500,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [8,8]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_8_v2i64:
@@ -19679,7 +19686,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [9,9]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9,9]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -19866,7 +19873,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9,9]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_9_v2i64:
@@ -20051,7 +20059,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [10,10]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [10,10]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -20238,7 +20246,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [10,10]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_10_v2i64:
@@ -20423,7 +20432,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [11,11]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [11,11]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -20610,7 +20619,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [11,11]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_11_v2i64:
@@ -20795,7 +20805,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [12,12]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [12,12]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -20982,7 +20992,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [12,12]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_12_v2i64:
@@ -21167,7 +21178,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [13,13]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [13,13]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -21354,7 +21365,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [13,13]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_13_v2i64:
@@ -21539,7 +21551,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [14,14]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [14,14]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -21726,7 +21738,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [14,14]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_14_v2i64:
@@ -21911,7 +21924,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [15,15]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -22098,7 +22111,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [15,15]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_15_v2i64:
@@ -22283,7 +22297,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,16]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [16,16]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -22470,7 +22484,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [16,16]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_16_v2i64:
@@ -22655,7 +22670,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [17,17]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [17,17]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -22842,7 +22857,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [17,17]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_17_v2i64:
@@ -23027,7 +23043,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [18,18]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18,18]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -23214,7 +23230,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18,18]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_18_v2i64:
@@ -23399,7 +23416,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [19,19]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [19,19]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -23586,7 +23603,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [19,19]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_19_v2i64:
@@ -23771,7 +23789,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [20,20]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [20,20]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -23958,7 +23976,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [20,20]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_20_v2i64:
@@ -24143,7 +24162,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [21,21]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [21,21]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -24330,7 +24349,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [21,21]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_21_v2i64:
@@ -24515,7 +24535,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [22,22]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [22,22]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -24702,7 +24722,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [22,22]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_22_v2i64:
@@ -24887,7 +24908,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [23,23]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [23,23]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -25074,7 +25095,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [23,23]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_23_v2i64:
@@ -25259,7 +25281,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [24,24]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [24,24]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -25446,7 +25468,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [24,24]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_24_v2i64:
@@ -25631,7 +25654,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [25,25]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [25,25]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -25818,7 +25841,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [25,25]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_25_v2i64:
@@ -26003,7 +26027,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [26,26]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [26,26]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -26190,7 +26214,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [26,26]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_26_v2i64:
@@ -26375,7 +26400,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [27,27]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [27,27]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -26562,7 +26587,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [27,27]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_27_v2i64:
@@ -26747,7 +26773,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [28,28]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [28,28]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -26934,7 +26960,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [28,28]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_28_v2i64:
@@ -27119,7 +27146,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [29,29]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [29,29]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -27306,7 +27333,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [29,29]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_29_v2i64:
@@ -27491,7 +27519,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [30,30]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [30,30]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -27678,7 +27706,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [30,30]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_30_v2i64:
@@ -27863,7 +27892,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [31,31]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [31,31]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -28050,7 +28079,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [31,31]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_31_v2i64:
@@ -28235,7 +28265,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [32,32]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [32,32]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -28422,7 +28452,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [32,32]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_32_v2i64:
@@ -28607,7 +28638,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [33,33]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [33,33]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -28794,7 +28825,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [33,33]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_33_v2i64:
@@ -28979,7 +29011,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [34,34]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [34,34]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -29166,7 +29198,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [34,34]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_34_v2i64:
@@ -29351,7 +29384,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [35,35]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [35,35]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -29538,7 +29571,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [35,35]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_35_v2i64:
@@ -29723,7 +29757,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [36,36]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [36,36]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -29910,7 +29944,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [36,36]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_36_v2i64:
@@ -30095,7 +30130,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [37,37]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [37,37]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -30282,7 +30317,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [37,37]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_37_v2i64:
@@ -30467,7 +30503,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [38,38]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [38,38]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -30654,7 +30690,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [38,38]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_38_v2i64:
@@ -30839,7 +30876,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [39,39]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [39,39]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -31026,7 +31063,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [39,39]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_39_v2i64:
@@ -31211,7 +31249,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [40,40]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [40,40]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -31398,7 +31436,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [40,40]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_40_v2i64:
@@ -31583,7 +31622,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [41,41]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [41,41]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -31770,7 +31809,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [41,41]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_41_v2i64:
@@ -31955,7 +31995,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [42,42]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [42,42]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -32142,7 +32182,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [42,42]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_42_v2i64:
@@ -32327,7 +32368,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [43,43]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [43,43]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -32514,7 +32555,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [43,43]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_43_v2i64:
@@ -32699,7 +32741,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [44,44]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [44,44]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -32886,7 +32928,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [44,44]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_44_v2i64:
@@ -33071,7 +33114,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [45,45]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [45,45]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -33258,7 +33301,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [45,45]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_45_v2i64:
@@ -33443,7 +33487,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [46,46]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [46,46]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -33630,7 +33674,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [46,46]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_46_v2i64:
@@ -33815,7 +33860,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [47,47]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [47,47]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -34002,7 +34047,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [47,47]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_47_v2i64:
@@ -34187,7 +34233,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [48,48]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [48,48]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -34374,7 +34420,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [48,48]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_48_v2i64:
@@ -34559,7 +34606,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [49,49]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [49,49]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -34746,7 +34793,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [49,49]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_49_v2i64:
@@ -34931,7 +34979,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [50,50]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [50,50]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -35118,7 +35166,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [50,50]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_50_v2i64:
@@ -35303,7 +35352,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [51,51]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [51,51]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -35490,7 +35539,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [51,51]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_51_v2i64:
@@ -35675,7 +35725,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [52,52]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [52,52]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -35862,7 +35912,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [52,52]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_52_v2i64:
@@ -36047,7 +36098,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [53,53]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [53,53]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -36234,7 +36285,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [53,53]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_53_v2i64:
@@ -36419,7 +36471,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [54,54]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [54,54]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -36606,7 +36658,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [54,54]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_54_v2i64:
@@ -36791,7 +36844,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [55,55]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [55,55]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -36978,7 +37031,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [55,55]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_55_v2i64:
@@ -37163,7 +37217,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [56,56]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [56,56]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -37350,7 +37404,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [56,56]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_56_v2i64:
@@ -37535,7 +37590,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [57,57]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [57,57]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -37722,7 +37777,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [57,57]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_57_v2i64:
@@ -37907,7 +37963,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [58,58]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [58,58]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -38094,7 +38150,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [58,58]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_58_v2i64:
@@ -38279,7 +38336,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [59,59]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [59,59]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -38466,7 +38523,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [59,59]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_59_v2i64:
@@ -38651,7 +38709,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [60,60]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [60,60]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -38838,7 +38896,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [60,60]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_60_v2i64:
@@ -39023,7 +39082,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [61,61]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [61,61]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -39210,7 +39269,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [61,61]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_61_v2i64:
@@ -39395,7 +39455,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [62,62]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [62,62]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
@@ -39582,7 +39642,8 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [62,62]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512VPOPCNTDQ-LABEL: ugt_62_v2i64:
@@ -39767,7 +39828,7 @@
 ; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [63,63]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [63,63]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
--- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
@@ -844,7 +844,8 @@
 ;
 ; AVX2-LABEL: trunc_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
+; AVX2-NEXT:    vptest %xmm1, %xmm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    retq
 ;
@@ -1057,7 +1058,8 @@
 ; AVX2-LABEL: PR44781:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX2-NEXT:    vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [64424509455,64424509455]
+; AVX2-NEXT:    vptest %xmm1, %xmm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
--- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
@@ -63,16 +63,27 @@
 ; SSE42-NEXT:    movq %xmm2, %rax
 ; SSE42-NEXT:    retq
 ;
-; AVX-LABEL: test_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm3
-; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm2
-; AVX-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    retq
+; AVX1-LABEL: test_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_v2i64:
 ; AVX512BW:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
--- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
@@ -63,16 +63,27 @@
 ; SSE42-NEXT:    movq %xmm2, %rax
 ; SSE42-NEXT:    retq
 ;
-; AVX-LABEL: test_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm3
-; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm2
-; AVX-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    retq
+; AVX1-LABEL: test_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_v2i64:
 ; AVX512BW:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -73,7 +73,7 @@
 ;
 ; AVX2-LABEL: var_rotate_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [64,64]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [64,64]
 ; AVX2-NEXT:    vpsubq %xmm1, %xmm2, %xmm2
 ; AVX2-NEXT:    vpsllvq %xmm1, %xmm0, %xmm1
 ; AVX2-NEXT:    vpsrlvq %xmm2, %xmm0, %xmm0
@@ -720,14 +720,23 @@
 ; SSE-NEXT:    por %xmm3, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: splatvar_rotate_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [64,64]
-; AVX-NEXT:    vpsubq %xmm1, %xmm2, %xmm2
-; AVX-NEXT:    vpsllq %xmm1, %xmm0, %xmm1
-; AVX-NEXT:    vpsrlq %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: splatvar_rotate_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [64,64]
+; AVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpsrlq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_rotate_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [64,64]
+; AVX2-NEXT:    vpsubq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT:    vpsllq %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpsrlq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: splatvar_rotate_v2i64:
 ; AVX512F:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -578,7 +578,7 @@
 ; AVX2-LABEL: splatvar_rotate_v4i64:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm2
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [64,64]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [64,64]
 ; AVX2-NEXT:    vpsubq %xmm1, %xmm3, %xmm1
 ; AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpor %ymm0, %ymm2, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -66,7 +66,7 @@
 ;
 ; AVX2-LABEL: var_shift_v2i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpsrlvq %xmm1, %xmm2, %xmm2
 ; AVX2-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
@@ -632,14 +632,23 @@
 ; SSE-NEXT:    psubq %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: splatvar_shift_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
-; AVX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: splatvar_shift_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOPAVX1-LABEL: splatvar_shift_v2i64:
 ; XOPAVX1:       # %bb.0:
@@ -933,15 +942,26 @@
 ; SSE-NEXT:    psubq %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: splatvar_modulo_shift_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
-; AVX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: splatvar_modulo_shift_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
 ; XOPAVX1:       # %bb.0:
@@ -954,7 +974,8 @@
 ;
 ; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64:
 ; XOPAVX2:       # %bb.0:
-; XOPAVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
 ; XOPAVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
 ; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; XOPAVX2-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
@@ -1601,10 +1622,16 @@
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
 ; AVX2-NEXT:    retq
 ;
-; XOP-LABEL: splatconstant_shift_v2i64:
-; XOP:       # %bb.0:
-; XOP-NEXT:    vpshaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; XOP-NEXT:    retq
+; XOPAVX1-LABEL: splatconstant_shift_v2i64:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vpshaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v2i64:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744073709551609,18446744073709551609]
+; XOPAVX2-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: splatconstant_shift_v2i64:
 ; AVX512:       # %bb.0:
@@ -1764,15 +1791,25 @@
 ; SSE-NEXT:    psubq %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: PR52719:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovd %edi, %xmm1
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
-; AVX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: PR52719:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovd %edi, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: PR52719:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovd %edi, %xmm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 ;
 ; XOPAVX1-LABEL: PR52719:
 ; XOPAVX1:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -1022,7 +1022,8 @@
 ;
 ; AVX2-LABEL: splatvar_modulo_shift_v4i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpsrlq %xmm1, %ymm2, %ymm2
 ; AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
@@ -1044,7 +1045,8 @@
 ;
 ; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
 ; XOPAVX2:       # %bb.0:
-; XOPAVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
 ; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
 ; XOPAVX2-NEXT:    vpsrlq %xmm1, %ymm2, %ymm2
 ; XOPAVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -775,17 +775,31 @@
 ; SSE-NEXT:    psrlq %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: splatvar_modulo_shift_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: splatvar_modulo_shift_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
 ;
-; XOP-LABEL: splatvar_modulo_shift_v2i64:
-; XOP:       # %bb.0:
-; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
-; XOP-NEXT:    retq
+; AVX2-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: splatvar_modulo_shift_v2i64:
 ; AVX512:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -832,7 +832,8 @@
 ;
 ; AVX2-LABEL: splatvar_modulo_shift_v4i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
@@ -847,7 +848,8 @@
 ;
 ; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
 ; XOPAVX2:       # %bb.0:
-; XOPAVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
 ; XOPAVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
 ; XOPAVX2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -682,17 +682,31 @@
 ; SSE-NEXT:    psllq %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: splatvar_modulo_shift_v2i64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: splatvar_modulo_shift_v2i64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
 ;
-; XOP-LABEL: splatvar_modulo_shift_v2i64:
-; XOP:       # %bb.0:
-; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
-; XOP-NEXT:    retq
+; AVX2-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: splatvar_modulo_shift_v2i64:
 ; AVX512:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -757,7 +757,8 @@
 ;
 ; AVX2-LABEL: splatvar_modulo_shift_v4i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
@@ -772,7 +773,8 @@
 ;
 ; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
 ; XOPAVX2:       # %bb.0:
-; XOPAVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
 ; XOPAVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
 ; XOPAVX2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll
--- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll
@@ -107,16 +107,27 @@
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_packus_v2i64_v2i32:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
-; AVX-NEXT:    vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_packus_v2i64_v2i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_packus_v2i64_v2i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_packus_v2i64_v2i32:
 ; AVX512F:       # %bb.0:
@@ -257,17 +268,29 @@
 ; SSE41-NEXT:    movq %xmm0, (%rdi)
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_packus_v2i64_v2i32_store:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
-; AVX-NEXT:    vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT:    vmovq %xmm0, (%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_packus_v2i64_v2i32_store:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vmovq %xmm0, (%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_packus_v2i64_v2i32_store:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    vmovq %xmm0, (%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_packus_v2i64_v2i32_store:
 ; AVX512F:       # %bb.0:
@@ -1115,7 +1138,7 @@
 ;
 ; AVX2-SLOW-LABEL: trunc_packus_v2i64_v2i16:
 ; AVX2-SLOW:       # %bb.0:
-; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm1 = [65535,65535]
+; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
 ; AVX2-SLOW-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
 ; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-SLOW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
@@ -1127,7 +1150,7 @@
 ;
 ; AVX2-FAST-LABEL: trunc_packus_v2i64_v2i16:
 ; AVX2-FAST:       # %bb.0:
-; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm1 = [65535,65535]
+; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
 ; AVX2-FAST-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
 ; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-FAST-NEXT:    vpxor %xmm1, %xmm1, %xmm1
@@ -1293,7 +1316,7 @@
 ;
 ; AVX2-SLOW-LABEL: trunc_packus_v2i64_v2i16_store:
 ; AVX2-SLOW:       # %bb.0:
-; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm1 = [65535,65535]
+; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
 ; AVX2-SLOW-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
 ; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-SLOW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
@@ -1306,7 +1329,7 @@
 ;
 ; AVX2-FAST-LABEL: trunc_packus_v2i64_v2i16_store:
 ; AVX2-FAST:       # %bb.0:
-; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm1 = [65535,65535]
+; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
 ; AVX2-FAST-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
 ; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-FAST-NEXT:    vpxor %xmm1, %xmm1, %xmm1
@@ -2801,16 +2824,27 @@
 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_packus_v2i64_v2i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
-; AVX-NEXT:    vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_packus_v2i64_v2i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255]
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_packus_v2i64_v2i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [255,255]
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_packus_v2i64_v2i8:
 ; AVX512F:       # %bb.0:
@@ -2955,17 +2989,29 @@
 ; SSE41-NEXT:    pextrw $0, %xmm1, (%rdi)
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_packus_v2i64_v2i8_store:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
-; AVX-NEXT:    vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    vpextrw $0, %xmm0, (%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_packus_v2i64_v2i8_store:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255]
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT:    vpextrw $0, %xmm0, (%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_packus_v2i64_v2i8_store:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [255,255]
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpextrw $0, %xmm0, (%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_packus_v2i64_v2i8_store:
 ; AVX512F:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
--- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
@@ -113,16 +113,27 @@
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_ssat_v2i64_v2i32:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_ssat_v2i64_v2i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_ssat_v2i64_v2i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647]
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_ssat_v2i64_v2i32:
 ; AVX512F:       # %bb.0:
@@ -259,17 +270,29 @@
 ; SSE41-NEXT:    movq %xmm0, (%rdi)
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_ssat_v2i64_v2i32_store:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT:    vmovlpd %xmm0, (%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_ssat_v2i64_v2i32_store:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_ssat_v2i64_v2i32_store:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647]
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_ssat_v2i64_v2i32_store:
 ; AVX512F:       # %bb.0:
@@ -1139,10 +1162,10 @@
 ;
 ; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16:
 ; AVX2-SLOW:       # %bb.0:
-; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767]
+; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [32767,32767]
 ; AVX2-SLOW-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
 ; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
+; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
 ; AVX2-SLOW-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
 ; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1151,10 +1174,10 @@
 ;
 ; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16:
 ; AVX2-FAST:       # %bb.0:
-; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767]
+; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [32767,32767]
 ; AVX2-FAST-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
 ; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
+; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
 ; AVX2-FAST-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
 ; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
@@ -1311,10 +1334,10 @@
 ;
 ; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16_store:
 ; AVX2-SLOW:       # %bb.0:
-; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767]
+; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [32767,32767]
 ; AVX2-SLOW-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
 ; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
+; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
 ; AVX2-SLOW-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
 ; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1324,10 +1347,10 @@
 ;
 ; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16_store:
 ; AVX2-FAST:       # %bb.0:
-; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767]
+; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [32767,32767]
 ; AVX2-FAST-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
 ; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
+; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
 ; AVX2-FAST-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
 ; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u]
@@ -2566,16 +2589,27 @@
 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_ssat_v2i64_v2i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127]
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_ssat_v2i64_v2i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127]
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_ssat_v2i64_v2i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [127,127]
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_ssat_v2i64_v2i8:
 ; AVX512F:       # %bb.0:
@@ -2715,17 +2749,29 @@
 ; SSE41-NEXT:    pextrw $0, %xmm1, (%rdi)
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_ssat_v2i64_v2i8_store:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127]
-; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    vpextrw $0, %xmm0, (%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_ssat_v2i64_v2i8_store:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127]
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT:    vpextrw $0, %xmm0, (%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_ssat_v2i64_v2i8_store:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [127,127]
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpextrw $0, %xmm0, (%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_ssat_v2i64_v2i8_store:
 ; AVX512F:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
--- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
@@ -75,15 +75,27 @@
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_usat_v2i64_v2i32:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
-; AVX-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_usat_v2i64_v2i32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_usat_v2i64_v2i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX2-NEXT:    # xmm1 = mem[0,0]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
+; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_usat_v2i64_v2i32:
 ; AVX512F:       # %bb.0:
@@ -180,16 +192,29 @@
 ; SSE41-NEXT:    movq %xmm0, (%rdi)
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_usat_v2i64_v2i32_store:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
-; AVX-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT:    vmovlpd %xmm0, (%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_usat_v2i64_v2i32_store:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_usat_v2i64_v2i32_store:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX2-NEXT:    # xmm1 = mem[0,0]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
+; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_usat_v2i64_v2i32_store:
 ; AVX512F:       # %bb.0:
@@ -793,9 +818,11 @@
 ;
 ; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16:
 ; AVX2-SLOW:       # %bb.0:
-; AVX2-SLOW-NEXT:    vmovapd {{.*#+}} xmm1 = [65535,65535]
-; AVX2-SLOW-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX2-SLOW-NEXT:    vmovddup {{.*#+}} xmm1 = [65535,65535]
+; AVX2-SLOW-NEXT:    # xmm1 = mem[0,0]
+; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-SLOW-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
 ; AVX2-SLOW-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
 ; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -804,9 +831,11 @@
 ;
 ; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16:
 ; AVX2-FAST:       # %bb.0:
-; AVX2-FAST-NEXT:    vmovapd {{.*#+}} xmm1 = [65535,65535]
-; AVX2-FAST-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX2-FAST-NEXT:    vmovddup {{.*#+}} xmm1 = [65535,65535]
+; AVX2-FAST-NEXT:    # xmm1 = mem[0,0]
+; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-FAST-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
 ; AVX2-FAST-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
 ; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
@@ -922,9 +951,11 @@
 ;
 ; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16_store:
 ; AVX2-SLOW:       # %bb.0:
-; AVX2-SLOW-NEXT:    vmovapd {{.*#+}} xmm1 = [65535,65535]
-; AVX2-SLOW-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX2-SLOW-NEXT:    vmovddup {{.*#+}} xmm1 = [65535,65535]
+; AVX2-SLOW-NEXT:    # xmm1 = mem[0,0]
+; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-SLOW-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
 ; AVX2-SLOW-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
 ; AVX2-SLOW-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -934,9 +965,11 @@
 ;
 ; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16_store:
 ; AVX2-FAST:       # %bb.0:
-; AVX2-FAST-NEXT:    vmovapd {{.*#+}} xmm1 = [65535,65535]
-; AVX2-FAST-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX2-FAST-NEXT:    vmovddup {{.*#+}} xmm1 = [65535,65535]
+; AVX2-FAST-NEXT:    # xmm1 = mem[0,0]
+; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-FAST-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
 ; AVX2-FAST-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
 ; AVX2-FAST-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u]
@@ -2098,15 +2131,27 @@
 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_usat_v2i64_v2i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [255,255]
-; AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
-; AVX-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_usat_v2i64_v2i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovapd {{.*#+}} xmm1 = [255,255]
+; AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_usat_v2i64_v2i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = [255,255]
+; AVX2-NEXT:    # xmm1 = mem[0,0]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
+; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_usat_v2i64_v2i8:
 ; AVX512F:       # %bb.0:
@@ -2206,16 +2251,29 @@
 ; SSE41-NEXT:    pextrw $0, %xmm2, (%rdi)
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc_usat_v2i64_v2i8_store:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [255,255]
-; AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
-; AVX-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX-NEXT:    vpextrw $0, %xmm0, (%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: trunc_usat_v2i64_v2i8_store:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovapd {{.*#+}} xmm1 = [255,255]
+; AVX1-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT:    vpextrw $0, %xmm0, (%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_usat_v2i64_v2i8_store:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovddup {{.*#+}} xmm1 = [255,255]
+; AVX2-NEXT:    # xmm1 = mem[0,0]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
+; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpextrw $0, %xmm0, (%rdi)
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_usat_v2i64_v2i8_store:
 ; AVX512F:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
--- a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
@@ -2680,11 +2680,18 @@
 ; X64-SSE2-NEXT:    psrlq $1, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
   %t1 = lshr <2 x i64> %t0, <i64 1, i64 1>
   ret <2 x i64> %t1
@@ -2709,11 +2716,18 @@
 ; X64-SSE2-NEXT:    psrlq $15, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $15, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $15, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $15, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = lshr <2 x i64> %t0, <i64 15, i64 15>
   ret <2 x i64> %t1
@@ -2737,11 +2751,18 @@
 ; X64-SSE2-NEXT:    psrlq $16, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = lshr <2 x i64> %t0, <i64 16, i64 16>
   ret <2 x i64> %t1
@@ -2765,11 +2786,18 @@
 ; X64-SSE2-NEXT:    psrlq $17, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $17, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $17, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $17, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = lshr <2 x i64> %t0, <i64 17, i64 17>
   ret <2 x i64> %t1
@@ -2793,11 +2821,18 @@
 ; X64-SSE2-NEXT:    psrlq $18, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $18, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $18, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $18, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = lshr <2 x i64> %t0, <i64 18, i64 18>
   ret <2 x i64> %t1
@@ -2822,11 +2857,18 @@
 ; X64-SSE2-NEXT:    psrlq $1, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
   %t1 = lshr <2 x i64> %t0, <i64 1, i64 1>
   ret <2 x i64> %t1
@@ -2857,11 +2899,18 @@
 ; X64-SSE2-NEXT:    psrlq $32, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
   %t1 = lshr <2 x i64> %t0, <i64 32, i64 32>
   ret <2 x i64> %t1
@@ -2936,11 +2985,18 @@
 ; X64-SSE2-NEXT:    psrlq $1, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
   %t1 = ashr <2 x i64> %t0, <i64 1, i64 1>
   ret <2 x i64> %t1
@@ -2965,11 +3021,18 @@
 ; X64-SSE2-NEXT:    psrlq $15, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $15, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $15, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $15, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = ashr <2 x i64> %t0, <i64 15, i64 15>
   ret <2 x i64> %t1
@@ -2993,11 +3056,18 @@
 ; X64-SSE2-NEXT:    psrlq $16, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $16, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = ashr <2 x i64> %t0, <i64 16, i64 16>
   ret <2 x i64> %t1
@@ -3021,11 +3091,18 @@
 ; X64-SSE2-NEXT:    psrlq $17, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $17, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $17, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $17, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = ashr <2 x i64> %t0, <i64 17, i64 17>
   ret <2 x i64> %t1
@@ -3049,11 +3126,18 @@
 ; X64-SSE2-NEXT:    psrlq $18, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlq $18, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlq $18, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlq $18, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = ashr <2 x i64> %t0, <i64 18, i64 18>
   ret <2 x i64> %t1
@@ -3099,7 +3183,8 @@
 ;
 ; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpsrad $1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; X64-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
@@ -3155,7 +3240,8 @@
 ;
 ; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_32:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; X64-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
@@ -3298,11 +3384,18 @@
 ; X64-SSE2-NEXT:    paddq %xmm0, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_1:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_shl_1:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_shl_1:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
   %t1 = shl <2 x i64> %t0, <i64 1, i64 1>
   ret <2 x i64> %t1
@@ -3333,11 +3426,18 @@
 ; X64-SSE2-NEXT:    psllq $32, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsllq $32, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsllq $32, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
   %t1 = shl <2 x i64> %t0, <i64 32, i64 32>
   ret <2 x i64> %t1
@@ -3410,11 +3510,18 @@
 ; X64-SSE2-NEXT:    psllq $15, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsllq $15, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsllq $15, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsllq $15, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = shl <2 x i64> %t0, <i64 15, i64 15>
   ret <2 x i64> %t1
@@ -3438,11 +3545,18 @@
 ; X64-SSE2-NEXT:    psllq $16, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsllq $16, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsllq $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsllq $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = shl <2 x i64> %t0, <i64 16, i64 16>
   ret <2 x i64> %t1
@@ -3466,11 +3580,18 @@
 ; X64-SSE2-NEXT:    psllq $17, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsllq $17, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsllq $17, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsllq $17, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = shl <2 x i64> %t0, <i64 17, i64 17>
   ret <2 x i64> %t1
@@ -3494,11 +3615,18 @@
 ; X64-SSE2-NEXT:    psllq $18, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsllq $18, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsllq $18, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [140737488289792,140737488289792]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsllq $18, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
   %t1 = shl <2 x i64> %t0, <i64 18, i64 18>
   ret <2 x i64> %t1
@@ -3523,11 +3651,18 @@
 ; X64-SSE2-NEXT:    paddq %xmm0, %xmm0
 ; X64-SSE2-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [18446744065119617024,18446744065119617024]
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
   %t1 = shl <2 x i64> %t0, <i64 1, i64 1>
   ret <2 x i64> %t1
diff --git a/llvm/test/CodeGen/X86/vselect-minmax.ll b/llvm/test/CodeGen/X86/vselect-minmax.ll
--- a/llvm/test/CodeGen/X86/vselect-minmax.ll
+++ b/llvm/test/CodeGen/X86/vselect-minmax.ll
@@ -9549,7 +9549,7 @@
 ;
 ; AVX2-LABEL: test181:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
@@ -9617,7 +9617,7 @@
 ;
 ; AVX2-LABEL: test182:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
@@ -9685,7 +9685,7 @@
 ;
 ; AVX2-LABEL: test183:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
@@ -9753,7 +9753,7 @@
 ;
 ; AVX2-LABEL: test184:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
@@ -10055,7 +10055,7 @@
 ;
 ; AVX2-LABEL: test189:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
@@ -10123,7 +10123,7 @@
 ;
 ; AVX2-LABEL: test190:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
@@ -10191,7 +10191,7 @@
 ;
 ; AVX2-LABEL: test191:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
@@ -10259,7 +10259,7 @@
 ;
 ; AVX2-LABEL: test192:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
 ; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
 ; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2