Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -735,8 +735,8 @@
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
-    addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
-                                                    : &X86::VR128RegClass);
+    addRegisterClass(MVT::v4f32, Subtarget.hasAVX512() ? &X86::VR128XRegClass
+                                                       : &X86::VR128RegClass);
 
     setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
     setOperationAction(ISD::FABS,               MVT::v4f32, Custom);
@@ -750,19 +750,19 @@
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
-    addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
-                                                    : &X86::VR128RegClass);
+    addRegisterClass(MVT::v2f64, Subtarget.hasAVX512() ? &X86::VR128XRegClass
+                                                       : &X86::VR128RegClass);
 
     // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
     // registers cannot be used even for integer operations.
-    addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
-                                                    : &X86::VR128RegClass);
-    addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
-                                                    : &X86::VR128RegClass);
-    addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
-                                                    : &X86::VR128RegClass);
-    addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
-                                                    : &X86::VR128RegClass);
+    addRegisterClass(MVT::v16i8, Subtarget.hasAVX512() ? &X86::VR128XRegClass
+                                                       : &X86::VR128RegClass);
+    addRegisterClass(MVT::v8i16, Subtarget.hasAVX512() ? &X86::VR128XRegClass
+                                                       : &X86::VR128RegClass);
+    addRegisterClass(MVT::v4i32, Subtarget.hasAVX512() ? &X86::VR128XRegClass
+                                                       : &X86::VR128RegClass);
+    addRegisterClass(MVT::v2i64, Subtarget.hasAVX512() ? &X86::VR128XRegClass
+                                                       : &X86::VR128RegClass);
 
     setOperationAction(ISD::MUL,                MVT::v16i8, Custom);
     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
@@ -971,18 +971,18 @@
   if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
     bool HasInt256 = Subtarget.hasInt256();
 
-    addRegisterClass(MVT::v32i8,  Subtarget.hasVLX() ? &X86::VR256XRegClass
-                                                     : &X86::VR256RegClass);
-    addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
-                                                     : &X86::VR256RegClass);
-    addRegisterClass(MVT::v8i32,  Subtarget.hasVLX() ? &X86::VR256XRegClass
-                                                     : &X86::VR256RegClass);
-    addRegisterClass(MVT::v8f32,  Subtarget.hasVLX() ? &X86::VR256XRegClass
-                                                     : &X86::VR256RegClass);
-    addRegisterClass(MVT::v4i64,  Subtarget.hasVLX() ? &X86::VR256XRegClass
-                                                     : &X86::VR256RegClass);
-    addRegisterClass(MVT::v4f64,  Subtarget.hasVLX() ? &X86::VR256XRegClass
-                                                     : &X86::VR256RegClass);
+    addRegisterClass(MVT::v32i8,  Subtarget.hasAVX512() ? &X86::VR256XRegClass
+                                                        : &X86::VR256RegClass);
+    addRegisterClass(MVT::v16i16, Subtarget.hasAVX512() ? &X86::VR256XRegClass
+                                                        : &X86::VR256RegClass);
+    addRegisterClass(MVT::v8i32,  Subtarget.hasAVX512() ? &X86::VR256XRegClass
+                                                        : &X86::VR256RegClass);
+    addRegisterClass(MVT::v8f32,  Subtarget.hasAVX512() ? &X86::VR256XRegClass
+                                                        : &X86::VR256RegClass);
+    addRegisterClass(MVT::v4i64,  Subtarget.hasAVX512() ? &X86::VR256XRegClass
+                                                        : &X86::VR256RegClass);
+    addRegisterClass(MVT::v4f64,  Subtarget.hasAVX512() ? &X86::VR256XRegClass
+                                                        : &X86::VR256RegClass);
 
     for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
       setOperationAction(ISD::FFLOOR,     VT, Legal);
Index: lib/Target/X86/X86RegisterInfo.cpp
===================================================================
--- lib/Target/X86/X86RegisterInfo.cpp
+++ lib/Target/X86/X86RegisterInfo.cpp
@@ -136,23 +136,14 @@
     switch (Super->getID()) {
     case X86::FR32RegClassID:
     case X86::FR64RegClassID:
-      // If AVX-512 isn't supported we should only inflate to these classes.
-      if (!Subtarget.hasAVX512() && Super->getSize() == RC->getSize())
-        return Super;
-      break;
     case X86::VR128RegClassID:
     case X86::VR256RegClassID:
-      // If VLX isn't supported we should only inflate to these classes.
-      if (!Subtarget.hasVLX() && Super->getSize() == RC->getSize())
+      // If AVX-512 isn't supported we should only inflate to these classes.
+      if (!Subtarget.hasAVX512() && Super->getSize() == RC->getSize())
         return Super;
       break;
     case X86::FR32XRegClassID:
     case X86::FR64XRegClassID:
-      // If VLX isn't support we shouldn't inflate to these classes.
-      if (!Subtarget.hasVLX())
-        break;
-      // The VLX check above passed, AVX512 check below will pass.
-      LLVM_FALLTHROUGH;
     case X86::VR128XRegClassID:
     case X86::VR256XRegClassID:
       // If AVX-512 isn't support we shouldn't inflate to these classes.
Index: test/CodeGen/X86/avx512-extended-xmm.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/avx512-extended-xmm.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
+
+; This test uses vpmovsxbd and vpmovdb to read and write xmm registers. These should be able to use XMM16-31 even without VLX.
+
+define <16 x i8> @foo(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 x i8> %a3, <16 x i8> %a4, <16 x i8> %a5) nounwind {
+; CHECK-LABEL: foo:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovdqa %xmm4, %xmm14
+; CHECK-NEXT:    vmovdqa %xmm3, %xmm10
+; CHECK-NEXT:    vmovdqa %xmm2, %xmm13
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm8
+; CHECK-NEXT:    vpaddb %xmm1, %xmm8, %xmm6
+; CHECK-NEXT:    vmovdqa %xmm1, %xmm11
+; CHECK-NEXT:    vpaddb %xmm10, %xmm13, %xmm2
+; CHECK-NEXT:    vpaddb %xmm5, %xmm14, %xmm4
+; CHECK-NEXT:    vpmovsxbw %xmm2, %ymm3
+; CHECK-NEXT:    vpmovsxbw %xmm6, %ymm0
+; CHECK-NEXT:    vmovdqa64 %zmm6, %zmm21
+; CHECK-NEXT:    vpmullw %ymm3, %ymm0, %ymm7
+; CHECK-NEXT:    vmovdqa64 %zmm3, %zmm20
+; CHECK-NEXT:    vpmovsxwd %ymm7, %zmm7
+; CHECK-NEXT:    vpmovdb %zmm7, %xmm12
+; CHECK-NEXT:    vmovdqa %xmm4, %xmm6
+; CHECK-NEXT:    vpaddb %xmm12, %xmm6, %xmm1
+; CHECK-NEXT:    vpmovsxbw %xmm1, %ymm7
+; CHECK-NEXT:    vmovdqa %xmm1, %xmm9
+; CHECK-NEXT:    vpmullw %ymm0, %ymm7, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm15
+; CHECK-NEXT:    vpaddb %xmm2, %xmm15, %xmm4
+; CHECK-NEXT:    vpmovsxbw %xmm4, %ymm2
+; CHECK-NEXT:    vpmovsxbw %xmm6, %ymm7
+; CHECK-NEXT:    vpmullw %ymm7, %ymm2, %ymm2
+; CHECK-NEXT:    vpmovsxwd %ymm2, %zmm2
+; CHECK-NEXT:    vpmovdb %zmm2, %xmm3
+; CHECK-NEXT:    vpaddb %xmm8, %xmm3, %xmm1
+; CHECK-NEXT:    vpmovsxbw %xmm1, %ymm2
+; CHECK-NEXT:    vpmovsxbw %xmm11, %ymm7
+; CHECK-NEXT:    vmovdqa64 %zmm11, %zmm28
+; CHECK-NEXT:    vpmullw %ymm7, %ymm2, %ymm2
+; CHECK-NEXT:    vpmovsxwd %ymm2, %zmm2
+; CHECK-NEXT:    vpmovsxbw %xmm10, %ymm11
+; CHECK-NEXT:    vpmovdb %zmm2, %xmm2
+; CHECK-NEXT:    vpaddb %xmm13, %xmm2, %xmm0
+; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm25
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm2
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm17
+; CHECK-NEXT:    vpmullw %ymm11, %ymm2, %ymm2
+; CHECK-NEXT:    vpmovsxwd %ymm2, %zmm16
+; CHECK-NEXT:    vpmovsxbw %xmm5, %ymm10
+; CHECK-NEXT:    vpmovdb %zmm16, %xmm5
+; CHECK-NEXT:    vpaddb %xmm14, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa64 %zmm5, %zmm19
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm5
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm18
+; CHECK-NEXT:    vpmullw %ymm10, %ymm5, %ymm5
+; CHECK-NEXT:    vpmovsxwd %ymm5, %zmm5
+; CHECK-NEXT:    vpmovdb %zmm5, %xmm5
+; CHECK-NEXT:    vpaddb %xmm1, %xmm5, %xmm5
+; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm22
+; CHECK-NEXT:    vpmovsxbw %xmm5, %ymm5
+; CHECK-NEXT:    vpmovsxbw %xmm3, %ymm1
+; CHECK-NEXT:    vpmullw %ymm1, %ymm5, %ymm0
+; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm23
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
+; CHECK-NEXT:    vmovdqa64 %zmm4, %zmm24
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vpmovsxbw %xmm15, %ymm1
+; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm26
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm9, %xmm0, %xmm0
+; CHECK-NEXT:    vmovdqa64 %zmm9, %zmm27
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vpmovsxbw %xmm12, %ymm1
+; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm29
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm6, %xmm0, %xmm1
+; CHECK-NEXT:    vpmovsxbw %xmm1, %ymm5
+; CHECK-NEXT:    vmovdqa64 %zmm20, %zmm12
+; CHECK-NEXT:    vpmullw %ymm12, %ymm5, %ymm5
+; CHECK-NEXT:    vpmovsxwd %ymm5, %zmm5
+; CHECK-NEXT:    vpmovdb %zmm5, %xmm5
+; CHECK-NEXT:    vmovdqa64 %zmm21, %zmm2
+; CHECK-NEXT:    vpaddb %xmm2, %xmm5, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm9
+; CHECK-NEXT:    vpmullw %ymm10, %ymm9, %ymm15
+; CHECK-NEXT:    vpmovsxwd %ymm15, %zmm15
+; CHECK-NEXT:    vpmovdb %zmm15, %xmm5
+; CHECK-NEXT:    vpaddb %xmm14, %xmm5, %xmm5
+; CHECK-NEXT:    vpmovsxbw %xmm5, %ymm5
+; CHECK-NEXT:    vpmullw %ymm11, %ymm5, %ymm5
+; CHECK-NEXT:    vpmovsxwd %ymm5, %zmm5
+; CHECK-NEXT:    vpmovdb %zmm5, %xmm5
+; CHECK-NEXT:    vpaddb %xmm13, %xmm5, %xmm5
+; CHECK-NEXT:    vpmovsxbw %xmm5, %ymm5
+; CHECK-NEXT:    vpmullw %ymm7, %ymm5, %ymm5
+; CHECK-NEXT:    vpmovsxwd %ymm5, %zmm5
+; CHECK-NEXT:    vpmovsxbd %xmm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm5, %xmm5
+; CHECK-NEXT:    vpaddb %xmm8, %xmm5, %xmm5
+; CHECK-NEXT:    vpmovsxbd %xmm5, %zmm7
+; CHECK-NEXT:    vmovdqa64 %zmm25, %zmm3
+; CHECK-NEXT:    vpmovsxbd %xmm3, %zmm15
+; CHECK-NEXT:    vpmovsxbd %xmm1, %zmm16
+; CHECK-NEXT:    vpaddd %zmm0, %zmm16, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm15, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm7, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vpmovsxbw %xmm8, %ymm4
+; CHECK-NEXT:    vpmullw %ymm4, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vmovdqa64 %zmm28, %zmm4
+; CHECK-NEXT:    vpaddb %xmm13, %xmm4, %xmm4
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vpmullw %ymm11, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm14, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vpmullw %ymm10, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vpmullw %ymm12, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm5, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vpmovsxbw %xmm3, %ymm2
+; CHECK-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vpmullw %ymm9, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm6, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vmovdqa64 %zmm29, %zmm1
+; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vmovdqa64 %zmm27, %zmm1
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vmovdqa64 %zmm26, %zmm1
+; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vmovdqa64 %zmm24, %zmm1
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vmovdqa64 %zmm23, %zmm1
+; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vmovdqa64 %zmm22, %zmm1
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vmovdqa64 %zmm17, %zmm1
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vmovdqa64 %zmm19, %zmm1
+; CHECK-NEXT:    vpmovsxbw %xmm1, %ymm1
+; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vmovdqa64 %zmm18, %zmm1
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %1 = add <16 x i8> %a0, %a1
+  %2 = add <16 x i8> %a2, %a3
+  %3 = add <16 x i8> %a4, %a5
+  %4 = mul <16 x i8> %1, %2
+  %5 = add <16 x i8> %3, %4
+  %6 = mul <16 x i8> %5, %1
+  %7 = add <16 x i8> %6, %2
+  %8 = mul <16 x i8> %7, %3
+  %9 = add <16 x i8> %8, %a0
+  %10 = mul <16 x i8> %9, %a1
+  %11 = add <16 x i8> %10, %a2
+  %12 = mul <16 x i8> %11, %a3
+  %13 = add <16 x i8> %12, %a4
+  %14 = mul <16 x i8> %13, %a5
+  %15 = add <16 x i8> %14, %9
+  %16 = mul <16 x i8> %15, %8
+  %17 = add <16 x i8> %16, %7
+  %18 = mul <16 x i8> %17, %6
+  %19 = add <16 x i8> %18, %5
+  %20 = mul <16 x i8> %19, %4
+  %21 = add <16 x i8> %20, %3
+  %22 = mul <16 x i8> %21, %2
+  %23 = add <16 x i8> %22, %1
+  %24 = mul <16 x i8> %23, %a5
+  %25 = add <16 x i8> %24, %a4
+  %26 = mul <16 x i8> %25, %a3
+  %27 = add <16 x i8> %26, %a2
+  %28 = mul <16 x i8> %27, %a1
+  %29 = add <16 x i8> %28, %a0
+  %30 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %29, <16 x i32> undef, i16 -1)
+  %31 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %10, <16 x i32> undef, i16 -1)
+  %32 = add <16 x i32> %30, %31
+  %33 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %21, <16 x i32> undef, i16 -1)
+  %34 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %23, <16 x i32> undef, i16 -1)
+  %35 = add <16 x i32> %33, %34
+  %36 = add <16 x i32> %32, %35
+  %37 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %36, <16 x i8> undef, i16 -1)
+  %38 = mul <16 x i8> %37, %a0
+  %39 = add <16 x i8> %38, %a1
+  %40 = add <16 x i8> %39, %a2
+  %41 = mul <16 x i8> %40, %a3
+  %42 = add <16 x i8> %41, %a4
+  %43 = mul <16 x i8> %42, %a5
+  %44 = add <16 x i8> %43, %1
+  %45 = mul <16 x i8> %44, %2
+  %46 = add <16 x i8> %45, %29
+  %47 = mul <16 x i8> %46, %10
+  %48 = add <16 x i8> %47, %21
+  %49 = mul <16 x i8> %48, %23
+  %50 = add <16 x i8> %49, %3
+  %51 = mul <16 x i8> %50, %4
+  %52 = add <16 x i8> %51, %5
+  %53 = mul <16 x i8> %52, %6
+  %54 = add <16 x i8> %53, %7
+  %55 = mul <16 x i8> %54, %8
+  %56 = add <16 x i8> %55, %9
+  %57 = mul <16 x i8> %56, %10
+  %58 = add <16 x i8> %57, %11
+  %59 = mul <16 x i8> %58, %12
+  %60 = add <16 x i8> %59, %13
+
+  ret <16 x i8> %60
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
+declare <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8>, <16 x i32>, i16)
+
Index: test/CodeGen/X86/vector-half-conversions.ll
===================================================================
--- test/CodeGen/X86/vector-half-conversions.ll
+++ test/CodeGen/X86/vector-half-conversions.ll
@@ -3350,69 +3350,69 @@
 ;
 ; AVX512F-LABEL: cvt_16f32_to_16i16:
 ; AVX512F:       # BB#0:
-; AVX512F-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm1, %ymm2
-; AVX512F-NEXT:    vmovd %xmm2, %eax
-; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm2, %ymm2
-; AVX512F-NEXT:    vmovd %eax, %xmm3
-; AVX512F-NEXT:    vmovd %xmm2, %eax
+; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm2, %ymm2
-; AVX512F-NEXT:    vpinsrw $1, %eax, %xmm3, %xmm3
-; AVX512F-NEXT:    vmovd %xmm2, %eax
-; AVX512F-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm2, %ymm14
+; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm3, %ymm3
+; AVX512F-NEXT:    vpermilps {{.*#+}} xmm4 = xmm0[3,1,2,3]
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm4, %ymm4
+; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm5 = xmm0[1,0]
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm5, %ymm5
+; AVX512F-NEXT:    vextractf64x4 $1, %zmm0, %ymm7
+; AVX512F-NEXT:    vextractf128 $1, %ymm7, %xmm8
+; AVX512F-NEXT:    vpermilps {{.*#+}} xmm6 = xmm8[3,1,2,3]
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm6, %ymm6
+; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm9 = xmm8[1,0]
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm9, %ymm9
+; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm10 = xmm8[1,1,3,3]
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm10, %ymm10
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm8, %ymm8
+; AVX512F-NEXT:    vpermilps {{.*#+}} xmm11 = xmm7[3,1,2,3]
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm11, %ymm11
+; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm12 = xmm7[1,0]
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm12, %ymm12
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm7, %ymm13
+; AVX512F-NEXT:    vmovd %xmm13, %eax
+; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm7 = xmm7[1,1,3,3]
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm7, %ymm7
+; AVX512F-NEXT:    vmovd %eax, %xmm2
+; AVX512F-NEXT:    vmovd %xmm7, %eax
+; AVX512F-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
+; AVX512F-NEXT:    vmovd %xmm12, %eax
+; AVX512F-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
+; AVX512F-NEXT:    vmovd %xmm11, %eax
+; AVX512F-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
+; AVX512F-NEXT:    vmovd %xmm8, %eax
+; AVX512F-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
+; AVX512F-NEXT:    vmovd %xmm10, %eax
+; AVX512F-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
+; AVX512F-NEXT:    vmovd %xmm9, %eax
+; AVX512F-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
+; AVX512F-NEXT:    vmovd %xmm6, %eax
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm0, %ymm6
+; AVX512F-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm2
+; AVX512F-NEXT:    vmovd %xmm6, %eax
+; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
+; AVX512F-NEXT:    vmovd %eax, %xmm6
+; AVX512F-NEXT:    vmovd %xmm0, %eax
+; AVX512F-NEXT:    vpinsrw $1, %eax, %xmm6, %xmm0
+; AVX512F-NEXT:    vmovd %xmm5, %eax
+; AVX512F-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
+; AVX512F-NEXT:    vmovd %xmm4, %eax
+; AVX512F-NEXT:    vcvtps2ph $4, %zmm1, %ymm4
+; AVX512F-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0
+; AVX512F-NEXT:    vmovd %xmm4, %eax
+; AVX512F-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
+; AVX512F-NEXT:    vmovd %xmm3, %eax
+; AVX512F-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
+; AVX512F-NEXT:    vmovd %xmm14, %eax
 ; AVX512F-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
 ; AVX512F-NEXT:    vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT:    vpinsrw $2, %eax, %xmm3, %xmm3
-; AVX512F-NEXT:    vmovd %xmm1, %eax
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm2, %ymm1
-; AVX512F-NEXT:    vpinsrw $3, %eax, %xmm3, %xmm3
-; AVX512F-NEXT:    vmovd %xmm1, %eax
-; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT:    vpinsrw $4, %eax, %xmm3, %xmm3
-; AVX512F-NEXT:    vmovd %xmm1, %eax
-; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT:    vpinsrw $5, %eax, %xmm3, %xmm3
+; AVX512F-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
 ; AVX512F-NEXT:    vmovd %xmm1, %eax
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm0, %ymm1
-; AVX512F-NEXT:    vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3]
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm2, %ymm2
-; AVX512F-NEXT:    vpinsrw $6, %eax, %xmm3, %xmm3
-; AVX512F-NEXT:    vmovd %xmm2, %eax
-; AVX512F-NEXT:    vpinsrw $7, %eax, %xmm3, %xmm2
-; AVX512F-NEXT:    vmovd %xmm1, %eax
-; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT:    vmovd %eax, %xmm3
-; AVX512F-NEXT:    vmovd %xmm1, %eax
-; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm1, %ymm1
-; AVX512F-NEXT:    vpinsrw $1, %eax, %xmm3, %xmm3
-; AVX512F-NEXT:    vmovd %xmm1, %eax
-; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX512F-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT:    vpinsrw $2, %eax, %xmm3, %xmm3
-; AVX512F-NEXT:    vmovd %xmm0, %eax
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm1, %ymm0
-; AVX512F-NEXT:    vpinsrw $3, %eax, %xmm3, %xmm3
-; AVX512F-NEXT:    vmovd %xmm0, %eax
-; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT:    vpinsrw $4, %eax, %xmm3, %xmm3
-; AVX512F-NEXT:    vmovd %xmm0, %eax
-; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT:    vpinsrw $5, %eax, %xmm3, %xmm3
-; AVX512F-NEXT:    vmovd %xmm0, %eax
-; AVX512F-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[3,1,2,3]
-; AVX512F-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
-; AVX512F-NEXT:    vpinsrw $6, %eax, %xmm3, %xmm1
-; AVX512F-NEXT:    vmovd %xmm0, %eax
-; AVX512F-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm0
+; AVX512F-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
 ;