Index: llvm/trunk/lib/Target/X86/X86LegalizerInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86LegalizerInfo.cpp
+++ llvm/trunk/lib/Target/X86/X86LegalizerInfo.cpp
@@ -184,6 +184,7 @@
     return;
 
   const LLT s64 = LLT::scalar(64);
+  const LLT v16s8 = LLT::vector(16, 8);
   const LLT v8s16 = LLT::vector(8, 16);
   const LLT v4s32 = LLT::vector(4, 32);
   const LLT v2s64 = LLT::vector(2, 64);
@@ -193,7 +194,7 @@
       setAction({BinOp, Ty}, Legal);
 
   for (unsigned BinOp : {G_ADD, G_SUB})
-    for (auto Ty : {v4s32})
+    for (auto Ty : {v16s8, v8s16, v4s32, v2s64})
       setAction({BinOp, Ty}, Legal);
 
   setAction({G_MUL, v8s16}, Legal);
@@ -212,8 +213,14 @@
   if (!Subtarget.hasAVX2())
     return;
 
+  const LLT v32s8 = LLT::vector(32, 8);
   const LLT v16s16 = LLT::vector(16, 16);
   const LLT v8s32 = LLT::vector(8, 32);
+  const LLT v4s64 = LLT::vector(4, 64);
+
+  for (unsigned BinOp : {G_ADD, G_SUB})
+    for (auto Ty : {v32s8, v16s16, v8s32, v4s64})
+      setAction({BinOp, Ty}, Legal);
 
   for (auto Ty : {v16s16, v8s32})
     setAction({G_MUL, Ty}, Legal);
@@ -224,6 +231,11 @@
     return;
 
   const LLT v16s32 = LLT::vector(16, 32);
+  const LLT v8s64 = LLT::vector(8, 64);
+
+  for (unsigned BinOp : {G_ADD, G_SUB})
+    for (auto Ty : {v16s32, v8s64})
+      setAction({BinOp, Ty}, Legal);
 
   setAction({G_MUL, v16s32}, Legal);
 
@@ -261,8 +273,13 @@
   if (!(Subtarget.hasAVX512() && Subtarget.hasBWI()))
     return;
 
+  const LLT v64s8 = LLT::vector(64, 8);
   const LLT v32s16 = LLT::vector(32, 16);
 
+  for (unsigned BinOp : {G_ADD, G_SUB})
+    for (auto Ty : {v64s8, v32s16})
+      setAction({BinOp, Ty}, Legal);
+
   setAction({G_MUL, v32s16}, Legal);
 
   /************ VLX *******************/
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/add-vec.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/add-vec.ll
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/add-vec.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -global-isel < %s -o - | FileCheck %s --check-prefix=SKX
+
+define <16 x i8> @test_add_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) {
+; SKX-LABEL: test_add_v16i8:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %ret = add <16 x i8> %arg1, %arg2
+  ret <16 x i8> %ret
+}
+
+define <8 x i16> @test_add_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) {
+; SKX-LABEL: test_add_v8i16:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %ret = add <8 x i16> %arg1, %arg2
+  ret <8 x i16> %ret
+}
+
+define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
+; SKX-LABEL: test_add_v4i32:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %ret = add <4 x i32> %arg1, %arg2
+  ret <4 x i32> %ret
+}
+
+define <2 x i64> @test_add_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) {
+; SKX-LABEL: test_add_v2i64:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %ret = add <2 x i64> %arg1, %arg2
+  ret <2 x i64> %ret
+}
+
+define <32 x i8> @test_add_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) {
+; SKX-LABEL: test_add_v32i8:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %ret = add <32 x i8> %arg1, %arg2
+  ret <32 x i8> %ret
+}
+
+define <16 x i16> @test_add_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) {
+; SKX-LABEL: test_add_v16i16:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %ret = add <16 x i16> %arg1, %arg2
+  ret <16 x i16> %ret
+}
+
+define <8 x i32> @test_add_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) {
+; SKX-LABEL: test_add_v8i32:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %ret = add <8 x i32> %arg1, %arg2
+  ret <8 x i32> %ret
+}
+
+define <4 x i64> @test_add_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) {
+; SKX-LABEL: test_add_v4i64:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %ret = add <4 x i64> %arg1, %arg2
+  ret <4 x i64> %ret
+}
+
+define <64 x i8> @test_add_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) {
+; SKX-LABEL: test_add_v64i8:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %ret = add <64 x i8> %arg1, %arg2
+  ret <64 x i8> %ret
+}
+
+define <32 x i16> @test_add_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) {
+; SKX-LABEL: test_add_v32i16:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %ret = add <32 x i16> %arg1, %arg2
+  ret <32 x i16> %ret
+}
+
+define <16 x i32> @test_add_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) {
+; SKX-LABEL: test_add_v16i32:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %ret = add <16 x i32> %arg1, %arg2
+  ret <16 x i32> %ret
+}
+
+define <8 x i64> @test_add_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) {
+; SKX-LABEL: test_add_v8i64:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %ret = add <8 x i64> %arg1, %arg2
+  ret <8 x i64> %ret
+}
+
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v128.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v128.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v128.mir
@@ -0,0 +1,119 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
+
+--- |
+  define void @test_add_v16i8() {
+    %ret = add <16 x i8> undef, undef
+    ret void
+  }
+
+  define void @test_add_v8i16() {
+    %ret = add <8 x i16> undef, undef
+    ret void
+  }
+
+  define void @test_add_v4i32() {
+    %ret = add <4 x i32> undef, undef
+    ret void
+  }
+
+  define void @test_add_v2i64() {
+    %ret = add <2 x i64> undef, undef
+    ret void
+  }
+...
+---
+name:            test_add_v16i8
+# ALL-LABEL: name:  test_add_v16i8
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<16 x s8>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<16 x s8>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<16 x s8>) = G_ADD %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<16 x s8>) = IMPLICIT_DEF
+    %1(<16 x s8>) = IMPLICIT_DEF
+    %2(<16 x s8>) = G_ADD %0, %1
+    RET 0
+
+...
+---
+name:            test_add_v8i16
+# ALL-LABEL: name:  test_add_v8i16
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<8 x s16>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<8 x s16>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<8 x s16>) = G_ADD %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<8 x s16>) = IMPLICIT_DEF
+    %1(<8 x s16>) = IMPLICIT_DEF
+    %2(<8 x s16>) = G_ADD %0, %1
+    RET 0
+
+...
+---
+name:            test_add_v4i32
+# ALL-LABEL: name:  test_add_v4i32
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<4 x s32>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<4 x s32>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<4 x s32>) = G_ADD %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<4 x s32>) = IMPLICIT_DEF
+    %1(<4 x s32>) = IMPLICIT_DEF
+    %2(<4 x s32>) = G_ADD %0, %1
+    RET 0
+
+...
+---
+name:            test_add_v2i64
+# ALL-LABEL: name:  test_add_v2i64
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<2 x s64>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<2 x s64>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<2 x s64>) = G_ADD %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<2 x s64>) = IMPLICIT_DEF
+    %1(<2 x s64>) = IMPLICIT_DEF
+    %2(<2 x s64>) = G_ADD %0, %1
+    RET 0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v256.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v256.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v256.mir
@@ -0,0 +1,157 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+
+--- |
+  define void @test_add_v32i8() {
+    %ret = add <32 x i8> undef, undef
+    ret void
+  }
+
+  define void @test_add_v16i16() {
+    %ret = add <16 x i16> undef, undef
+    ret void
+  }
+
+  define void @test_add_v8i32() {
+    %ret = add <8 x i32> undef, undef
+    ret void
+  }
+
+  define void @test_add_v4i64() {
+    %ret = add <4 x i64> undef, undef
+    ret void
+  }
+
+...
+---
+name:            test_add_v32i8
+# ALL-LABEL: name:  test_add_v32i8
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX1:          %0(<32 x s8>) = IMPLICIT_DEF
+# AVX1-NEXT:     %1(<32 x s8>) = IMPLICIT_DEF
+# AVX1-NEXT:     %3(<16 x s8>), %4(<16 x s8>) = G_UNMERGE_VALUES %0(<32 x s8>)
+# AVX1-NEXT:     %5(<16 x s8>), %6(<16 x s8>) = G_UNMERGE_VALUES %1(<32 x s8>)
+# AVX1-NEXT:     %7(<16 x s8>) = G_ADD %3, %5
+# AVX1-NEXT:     %8(<16 x s8>) = G_ADD %4, %6
+# AVX1-NEXT:     %2(<32 x s8>) = G_MERGE_VALUES %7(<16 x s8>), %8(<16 x s8>)
+# AVX1-NEXT:     RET 0
+#
+# AVX2:          %0(<32 x s8>) = IMPLICIT_DEF
+# AVX2-NEXT:     %1(<32 x s8>) = IMPLICIT_DEF
+# AVX2-NEXT:     %2(<32 x s8>) = G_ADD %0, %1
+# AVX2-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<32 x s8>) = IMPLICIT_DEF
+    %1(<32 x s8>) = IMPLICIT_DEF
+    %2(<32 x s8>) = G_ADD %0, %1
+    RET 0
+
+...
+---
+name:            test_add_v16i16
+# ALL-LABEL: name:  test_add_v16i16
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX1:          %0(<16 x s16>) = IMPLICIT_DEF
+# AVX1-NEXT:     %1(<16 x s16>) = IMPLICIT_DEF
+# AVX1-NEXT:     %3(<8 x s16>), %4(<8 x s16>) = G_UNMERGE_VALUES %0(<16 x s16>)
+# AVX1-NEXT:     %5(<8 x s16>), %6(<8 x s16>) = G_UNMERGE_VALUES %1(<16 x s16>)
+# AVX1-NEXT:     %7(<8 x s16>) = G_ADD %3, %5
+# AVX1-NEXT:     %8(<8 x s16>) = G_ADD %4, %6
+# AVX1-NEXT:     %2(<16 x s16>) = G_MERGE_VALUES %7(<8 x s16>), %8(<8 x s16>)
+# AVX1-NEXT:     RET 0
+#
+# AVX2:          %0(<16 x s16>) = IMPLICIT_DEF
+# AVX2-NEXT:     %1(<16 x s16>) = IMPLICIT_DEF
+# AVX2-NEXT:     %2(<16 x s16>) = G_ADD %0, %1
+# AVX2-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<16 x s16>) = IMPLICIT_DEF
+    %1(<16 x s16>) = IMPLICIT_DEF
+    %2(<16 x s16>) = G_ADD %0, %1
+    RET 0
+
+...
+---
+name:            test_add_v8i32
+# ALL-LABEL: name:  test_add_v8i32
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX1:          %0(<8 x s32>) = IMPLICIT_DEF
+# AVX1-NEXT:     %1(<8 x s32>) = IMPLICIT_DEF
+# AVX1-NEXT:     %3(<4 x s32>), %4(<4 x s32>) = G_UNMERGE_VALUES %0(<8 x s32>)
+# AVX1-NEXT:     %5(<4 x s32>), %6(<4 x s32>) = G_UNMERGE_VALUES %1(<8 x s32>)
+# AVX1-NEXT:     %7(<4 x s32>) = G_ADD %3, %5
+# AVX1-NEXT:     %8(<4 x s32>) = G_ADD %4, %6
+# AVX1-NEXT:     %2(<8 x s32>) = G_MERGE_VALUES %7(<4 x s32>), %8(<4 x s32>)
+# AVX1-NEXT:     RET 0
+#
+# AVX2:          %0(<8 x s32>) = IMPLICIT_DEF
+# AVX2-NEXT:     %1(<8 x s32>) = IMPLICIT_DEF
+# AVX2-NEXT:     %2(<8 x s32>) = G_ADD %0, %1
+# AVX2-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<8 x s32>) = IMPLICIT_DEF
+    %1(<8 x s32>) = IMPLICIT_DEF
+    %2(<8 x s32>) = G_ADD %0, %1
+    RET 0
+
+...
+---
+name:            test_add_v4i64
+# ALL-LABEL: name:  test_add_v4i64
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX1:          %0(<4 x s64>) = IMPLICIT_DEF
+# AVX1-NEXT:     %1(<4 x s64>) = IMPLICIT_DEF
+# AVX1-NEXT:     %3(<2 x s64>), %4(<2 x s64>) = G_UNMERGE_VALUES %0(<4 x s64>)
+# AVX1-NEXT:     %5(<2 x s64>), %6(<2 x s64>) = G_UNMERGE_VALUES %1(<4 x s64>)
+# AVX1-NEXT:     %7(<2 x s64>) = G_ADD %3, %5
+# AVX1-NEXT:     %8(<2 x s64>) = G_ADD %4, %6
+# AVX1-NEXT:     %2(<4 x s64>) = G_MERGE_VALUES %7(<2 x s64>), %8(<2 x s64>)
+# AVX1-NEXT:     RET 0
+#
+# AVX2:          %0(<4 x s64>) = IMPLICIT_DEF
+# AVX2-NEXT:     %1(<4 x s64>) = IMPLICIT_DEF
+# AVX2-NEXT:     %2(<4 x s64>) = G_ADD %0, %1
+# AVX2-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<4 x s64>) = IMPLICIT_DEF
+    %1(<4 x s64>) = IMPLICIT_DEF
+    %2(<4 x s64>) = G_ADD %0, %1
+    RET 0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir
@@ -0,0 +1,139 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f           -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512bw -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
+
+--- |
+  define void @test_add_v64i8() {
+    %ret = add <64 x i8> undef, undef
+    ret void
+  }
+
+  define void @test_add_v32i16() {
+    %ret = add <32 x i16> undef, undef
+    ret void
+  }
+
+  define void @test_add_v16i32() {
+    %ret = add <16 x i32> undef, undef
+    ret void
+  }
+
+  define void @test_add_v8i64() {
+    %ret = add <8 x i64> undef, undef
+    ret void
+  }
+
+...
+---
+name:            test_add_v64i8
+# ALL-LABEL: name:  test_add_v64i8
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX512F:          %0(<64 x s8>) = IMPLICIT_DEF
+# AVX512F-NEXT:     %1(<64 x s8>) = IMPLICIT_DEF
+# AVX512F-NEXT:     %3(<32 x s8>), %4(<32 x s8>) = G_UNMERGE_VALUES %0(<64 x s8>)
+# AVX512F-NEXT:     %5(<32 x s8>), %6(<32 x s8>) = G_UNMERGE_VALUES %1(<64 x s8>)
+# AVX512F-NEXT:     %7(<32 x s8>) = G_ADD %3, %5
+# AVX512F-NEXT:     %8(<32 x s8>) = G_ADD %4, %6
+# AVX512F-NEXT:     %2(<64 x s8>) = G_MERGE_VALUES %7(<32 x s8>), %8(<32 x s8>)
+# AVX512F-NEXT:     RET 0
+#
+# AVX512BW:          %0(<64 x s8>) = IMPLICIT_DEF
+# AVX512BW-NEXT:     %1(<64 x s8>) = IMPLICIT_DEF
+# AVX512BW-NEXT:     %2(<64 x s8>) = G_ADD %0, %1
+# AVX512BW-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<64 x s8>) = IMPLICIT_DEF
+    %1(<64 x s8>) = IMPLICIT_DEF
+    %2(<64 x s8>) = G_ADD %0, %1
+    RET 0
+
+...
+---
+name:            test_add_v32i16
+# ALL-LABEL: name:  test_add_v32i16
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX512F:          %0(<32 x s16>) = IMPLICIT_DEF
+# AVX512F-NEXT:     %1(<32 x s16>) = IMPLICIT_DEF
+# AVX512F-NEXT:     %3(<16 x s16>), %4(<16 x s16>) = G_UNMERGE_VALUES %0(<32 x s16>)
+# AVX512F-NEXT:     %5(<16 x s16>), %6(<16 x s16>) = G_UNMERGE_VALUES %1(<32 x s16>)
+# AVX512F-NEXT:     %7(<16 x s16>) = G_ADD %3, %5
+# AVX512F-NEXT:     %8(<16 x s16>) = G_ADD %4, %6
+# AVX512F-NEXT:     %2(<32 x s16>) = G_MERGE_VALUES %7(<16 x s16>), %8(<16 x s16>)
+# AVX512F-NEXT:     RET 0
+#
+# AVX512BW:          %0(<32 x s16>) = IMPLICIT_DEF
+# AVX512BW-NEXT:     %1(<32 x s16>) = IMPLICIT_DEF
+# AVX512BW-NEXT:     %2(<32 x s16>) = G_ADD %0, %1
+# AVX512BW-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<32 x s16>) = IMPLICIT_DEF
+    %1(<32 x s16>) = IMPLICIT_DEF
+    %2(<32 x s16>) = G_ADD %0, %1
+    RET 0
+
+...
+---
+name:            test_add_v16i32
+# ALL-LABEL: name:  test_add_v16i32
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<16 x s32>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<16 x s32>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<16 x s32>) = G_ADD %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<16 x s32>) = IMPLICIT_DEF
+    %1(<16 x s32>) = IMPLICIT_DEF
+    %2(<16 x s32>) = G_ADD %0, %1
+    RET 0
+
+...
+---
+name:            test_add_v8i64
+# ALL-LABEL: name:  test_add_v8i64
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<8 x s64>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<8 x s64>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<8 x s64>) = G_ADD %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<8 x s64>) = IMPLICIT_DEF
+    %1(<8 x s64>) = IMPLICIT_DEF
+    %2(<8 x s64>) = G_ADD %0, %1
+    RET 0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v128.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v128.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v128.mir
@@ -0,0 +1,119 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
+
+--- |
+  define void @test_sub_v16i8() {
+    %ret = sub <16 x i8> undef, undef
+    ret void
+  }
+
+  define void @test_sub_v8i16() {
+    %ret = sub <8 x i16> undef, undef
+    ret void
+  }
+
+  define void @test_sub_v4i32() {
+    %ret = sub <4 x i32> undef, undef
+    ret void
+  }
+
+  define void @test_sub_v2i64() {
+    %ret = sub <2 x i64> undef, undef
+    ret void
+  }
+...
+---
+name:            test_sub_v16i8
+# ALL-LABEL: name:  test_sub_v16i8
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<16 x s8>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<16 x s8>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<16 x s8>) = G_SUB %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<16 x s8>) = IMPLICIT_DEF
+    %1(<16 x s8>) = IMPLICIT_DEF
+    %2(<16 x s8>) = G_SUB %0, %1
+    RET 0
+
+...
+---
+name:            test_sub_v8i16
+# ALL-LABEL: name:  test_sub_v8i16
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<8 x s16>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<8 x s16>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<8 x s16>) = G_SUB %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<8 x s16>) = IMPLICIT_DEF
+    %1(<8 x s16>) = IMPLICIT_DEF
+    %2(<8 x s16>) = G_SUB %0, %1
+    RET 0
+
+...
+---
+name:            test_sub_v4i32
+# ALL-LABEL: name:  test_sub_v4i32
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<4 x s32>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<4 x s32>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<4 x s32>) = G_SUB %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<4 x s32>) = IMPLICIT_DEF
+    %1(<4 x s32>) = IMPLICIT_DEF
+    %2(<4 x s32>) = G_SUB %0, %1
+    RET 0
+
+...
+---
+name:            test_sub_v2i64
+# ALL-LABEL: name:  test_sub_v2i64
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<2 x s64>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<2 x s64>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<2 x s64>) = G_SUB %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<2 x s64>) = IMPLICIT_DEF
+    %1(<2 x s64>) = IMPLICIT_DEF
+    %2(<2 x s64>) = G_SUB %0, %1
+    RET 0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v256.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v256.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v256.mir
@@ -0,0 +1,120 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+# TODO: add tests for additional configuration after the legalization supported
+--- |
+  define void @test_sub_v32i8() {
+    %ret = sub <32 x i8> undef, undef
+    ret void
+  }
+
+  define void @test_sub_v16i16() {
+    %ret = sub <16 x i16> undef, undef
+    ret void
+  }
+
+  define void @test_sub_v8i32() {
+    %ret = sub <8 x i32> undef, undef
+    ret void
+  }
+
+  define void @test_sub_v4i64() {
+    %ret = sub <4 x i64> undef, undef
+    ret void
+  }
+
+...
+---
+name:            test_sub_v32i8
+# ALL-LABEL: name:  test_sub_v32i8
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX2:          %0(<32 x s8>) = IMPLICIT_DEF
+# AVX2-NEXT:     %1(<32 x s8>) = IMPLICIT_DEF
+# AVX2-NEXT:     %2(<32 x s8>) = G_SUB %0, %1
+# AVX2-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<32 x s8>) = IMPLICIT_DEF
+    %1(<32 x s8>) = IMPLICIT_DEF
+    %2(<32 x s8>) = G_SUB %0, %1
+    RET 0
+
+...
+---
+name:            test_sub_v16i16
+# ALL-LABEL: name:  test_sub_v16i16
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX2:          %0(<16 x s16>) = IMPLICIT_DEF
+# AVX2-NEXT:     %1(<16 x s16>) = IMPLICIT_DEF
+# AVX2-NEXT:     %2(<16 x s16>) = G_SUB %0, %1
+# AVX2-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<16 x s16>) = IMPLICIT_DEF
+    %1(<16 x s16>) = IMPLICIT_DEF
+    %2(<16 x s16>) = G_SUB %0, %1
+    RET 0
+
+...
+---
+name:            test_sub_v8i32
+# ALL-LABEL: name:  test_sub_v8i32
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX2:          %0(<8 x s32>) = IMPLICIT_DEF
+# AVX2-NEXT:     %1(<8 x s32>) = IMPLICIT_DEF
+# AVX2-NEXT:     %2(<8 x s32>) = G_SUB %0, %1
+# AVX2-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<8 x s32>) = IMPLICIT_DEF
+    %1(<8 x s32>) = IMPLICIT_DEF
+    %2(<8 x s32>) = G_SUB %0, %1
+    RET 0
+
+...
+---
+name:            test_sub_v4i64
+# ALL-LABEL: name:  test_sub_v4i64
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX2:          %0(<4 x s64>) = IMPLICIT_DEF
+# AVX2-NEXT:     %1(<4 x s64>) = IMPLICIT_DEF
+# AVX2-NEXT:     %2(<4 x s64>) = G_SUB %0, %1
+# AVX2-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<4 x s64>) = IMPLICIT_DEF
+    %1(<4 x s64>) = IMPLICIT_DEF
+    %2(<4 x s64>) = G_SUB %0, %1
+    RET 0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v512.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v512.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v512.mir
@@ -0,0 +1,120 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512bw -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
+# TODO: add tests for additional configuration after the legalization supported
+--- |
+  define void @test_sub_v64i8() {
+    %ret = sub <64 x i8> undef, undef
+    ret void
+  }
+
+  define void @test_sub_v32i16() {
+    %ret = sub <32 x i16> undef, undef
+    ret void
+  }
+
+  define void @test_sub_v16i32() {
+    %ret = sub <16 x i32> undef, undef
+    ret void
+  }
+
+  define void @test_sub_v8i64() {
+    %ret = sub <8 x i64> undef, undef
+    ret void
+  }
+
+...
+---
+name:            test_sub_v64i8
+# ALL-LABEL: name:  test_sub_v64i8
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX512BW:          %0(<64 x s8>) = IMPLICIT_DEF
+# AVX512BW-NEXT:     %1(<64 x s8>) = IMPLICIT_DEF
+# AVX512BW-NEXT:     %2(<64 x s8>) = G_SUB %0, %1
+# AVX512BW-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<64 x s8>) = IMPLICIT_DEF
+    %1(<64 x s8>) = IMPLICIT_DEF
+    %2(<64 x s8>) = G_SUB %0, %1
+    RET 0
+
+...
+---
+name:            test_sub_v32i16
+# ALL-LABEL: name:  test_sub_v32i16
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# AVX512BW:          %0(<32 x s16>) = IMPLICIT_DEF
+# AVX512BW-NEXT:     %1(<32 x s16>) = IMPLICIT_DEF
+# AVX512BW-NEXT:     %2(<32 x s16>) = G_SUB %0, %1
+# AVX512BW-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<32 x s16>) = IMPLICIT_DEF
+    %1(<32 x s16>) = IMPLICIT_DEF
+    %2(<32 x s16>) = G_SUB %0, %1
+    RET 0
+
+...
+---
+name:            test_sub_v16i32
+# ALL-LABEL: name:  test_sub_v16i32
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<16 x s32>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<16 x s32>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<16 x s32>) = G_SUB %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<16 x s32>) = IMPLICIT_DEF
+    %1(<16 x s32>) = IMPLICIT_DEF
+    %2(<16 x s32>) = G_SUB %0, %1
+    RET 0
+
+...
+---
+name:            test_sub_v8i64
+# ALL-LABEL: name:  test_sub_v8i64
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<8 x s64>) = IMPLICIT_DEF
+# ALL-NEXT:     %1(<8 x s64>) = IMPLICIT_DEF
+# ALL-NEXT:     %2(<8 x s64>) = G_SUB %0, %1
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<8 x s64>) = IMPLICIT_DEF
+    %1(<8 x s64>) = IMPLICIT_DEF
+    %2(<8 x s64>) = G_SUB %0, %1
+    RET 0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
@@ -5,6 +5,15 @@
   define void @test_mul_vec256() {
     ret void
   }
+
+  define void @test_add_vec256() {
+    ret void
+  }
+
+  define void @test_sub_vec256() {
+    ret void
+  }
+
 ...
 ---
 name:            test_mul_vec256
@@ -29,3 +38,49 @@
     RET 0
 
 ...
+---
+name:            test_add_vec256
+alignment:       4
+legalized:       true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+# CHECK-LABEL: name:            test_add_vec256
+# CHECK: registers:
+# CHECK:  - { id: 0, class: vecr }
+# CHECK:  - { id: 1, class: vecr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.1 (%ir-block.0):
+
+    %0(<8 x s32>) = IMPLICIT_DEF
+    %1(<8 x s32>) = G_ADD %0, %0
+    RET 0
+
+...
+---
+name:            test_sub_vec256
+alignment:       4
+legalized:       true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+# CHECK-LABEL: name:            test_sub_vec256
+# CHECK: registers:
+# CHECK:  - { id: 0, class: vecr }
+# CHECK:  - { id: 1, class: vecr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.1 (%ir-block.0):
+
+    %0(<8 x s32>) = IMPLICIT_DEF
+    %1(<8 x s32>) = G_SUB %0, %0
+    RET 0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
@@ -7,6 +7,14 @@
     ret void
   }
 
+  define void @test_add_vec512() {
+    ret void
+  }
+
+  define void @test_sub_vec512() {
+    ret void
+  }
+
 ...
 ---
 name:            test_mul_vec512
@@ -31,3 +39,49 @@
     RET 0
 
 ...
+---
+name:            test_add_vec512
+alignment:       4
+legalized:       true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+# CHECK-LABEL: name:            test_add_vec512
+# CHECK: registers:
+# CHECK:  - { id: 0, class: vecr }
+# CHECK:  - { id: 1, class: vecr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.1 (%ir-block.0):
+
+    %0(<16 x s32>) = IMPLICIT_DEF
+    %1(<16 x s32>) = G_ADD %0, %0
+    RET 0
+
+...
+---
+name:            test_sub_vec512
+alignment:       4
+legalized:       true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+# CHECK-LABEL: name:            test_sub_vec512
+# CHECK: registers:
+# CHECK:  - { id: 0, class: vecr }
+# CHECK:  - { id: 1, class: vecr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.1 (%ir-block.0):
+
+    %0(<16 x s32>) = IMPLICIT_DEF
+    %1(<16 x s32>) = G_SUB %0, %0
+    RET 0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v128.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v128.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v128.mir
@@ -0,0 +1,195 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2                        -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=SSE2
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx                         -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=AVX1
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl           -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL
+
+--- |
+  define <16 x i8> @test_add_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) {
+    %ret = add <16 x i8> %arg1, %arg2
+    ret <16 x i8> %ret
+  }
+
+  define <8 x i16> @test_add_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) {
+    %ret = add <8 x i16> %arg1, %arg2
+    ret <8 x i16> %ret
+  }
+
+  define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
+    %ret = add <4 x i32> %arg1, %arg2
+    ret <4 x i32> %ret
+  }
+
+  define <2 x i64> @test_add_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) {
+    %ret = add <2 x i64> %arg1, %arg2
+    ret <2 x i64> %ret
+  }
+
+...
+---
+name:            test_add_v16i8
+# ALL-LABEL: name:  test_add_v16i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+# NOVL:            registers:
+# NOVL-NEXT:         - { id: 0, class: vr128 }
+# NOVL-NEXT:         - { id: 1, class: vr128 }
+# NOVL-NEXT:         - { id: 2, class: vr128 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr128 }
+# AVX512VL-NEXT:     - { id: 1, class: vr128 }
+# AVX512VL-NEXT:     - { id: 2, class: vr128 }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# SSE2:                %2 = PADDBrr %0, %1
+#
+# AVX1:                %2 = VPADDBrr %0, %1
+#
+# AVX512VL:            %2 = VPADDBrr %0, %1
+#
+# AVX512BWVL:          %2 = VPADDBZ128rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<16 x s8>) = COPY %xmm0
+    %1(<16 x s8>) = COPY %xmm1
+    %2(<16 x s8>) = G_ADD %0, %1
+    %xmm0 = COPY %2(<16 x s8>)
+    RET 0, implicit %xmm0
+
+...
+---
+name:            test_add_v8i16
+# ALL-LABEL: name:  test_add_v8i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+# NOVL:            registers:
+# NOVL-NEXT:         - { id: 0, class: vr128 }
+# NOVL-NEXT:         - { id: 1, class: vr128 }
+# NOVL-NEXT:         - { id: 2, class: vr128 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr128 }
+# AVX512VL-NEXT:     - { id: 1, class: vr128 }
+# AVX512VL-NEXT:     - { id: 2, class: vr128 }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# SSE2:                %2 = PADDWrr %0, %1
+#
+# AVX1:                %2 = VPADDWrr %0, %1
+#
+# AVX512VL:            %2 = VPADDWrr %0, %1
+#
+# AVX512BWVL:          %2 = VPADDWZ128rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<8 x s16>) = COPY %xmm0
+    %1(<8 x s16>) = COPY %xmm1
+    %2(<8 x s16>) = G_ADD %0, %1
+    %xmm0 = COPY %2(<8 x s16>)
+    RET 0, implicit %xmm0
+
+...
+---
+name:            test_add_v4i32
+# ALL-LABEL: name:  test_add_v4i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+# NOVL:            registers:
+# NOVL-NEXT:         - { id: 0, class: vr128 }
+# NOVL-NEXT:         - { id: 1, class: vr128 }
+# NOVL-NEXT:         - { id: 2, class: vr128 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr128x }
+# AVX512VL-NEXT:     - { id: 1, class: vr128x }
+# AVX512VL-NEXT:     - { id: 2, class: vr128x }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# SSE2:                %2 = PADDDrr %0, %1
+#
+# AVX1:                %2 = VPADDDrr %0, %1
+#
+# AVX512VL:            %2 = VPADDDZ128rr %0, %1
+#
+# AVX512BWVL:          %2 = VPADDDZ128rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<4 x s32>) = COPY %xmm0
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<4 x s32>) = G_ADD %0, %1
+    %xmm0 = COPY %2(<4 x s32>)
+    RET 0, implicit %xmm0
+
+...
+---
+name:            test_add_v2i64
+# ALL-LABEL: name:  test_add_v2i64
+alignment:       4
+legalized:       true
+regBankSelected: true
+# NOVL:            registers:
+# NOVL-NEXT:         - { id: 0, class: vr128 }
+# NOVL-NEXT:         - { id: 1, class: vr128 }
+# NOVL-NEXT:         - { id: 2, class: vr128 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr128x }
+# AVX512VL-NEXT:     - { id: 1, class: vr128x }
+# AVX512VL-NEXT:     - { id: 2, class: vr128x }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# SSE2:                %2 = PADDQrr %0, %1
+#
+# AVX1:                %2 = VPADDQrr %0, %1
+#
+# AVX512VL:            %2 = VPADDQZ128rr %0, %1
+#
+# AVX512BWVL:          %2 = VPADDQZ128rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<2 x s64>) = COPY %xmm0
+    %1(<2 x s64>) = COPY %xmm1
+    %2(<2 x s64>) = G_ADD %0, %1
+    %xmm0 = COPY %2(<2 x s64>)
+    RET 0, implicit %xmm0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v256.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v256.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v256.mir
@@ -0,0 +1,185 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2                        -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl           -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL
+
+--- |
+  define <32 x i8> @test_add_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) {
+    %ret = add <32 x i8> %arg1, %arg2
+    ret <32 x i8> %ret
+  }
+
+  define <16 x i16> @test_add_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) {
+    %ret = add <16 x i16> %arg1, %arg2
+    ret <16 x i16> %ret
+  }
+
+  define <8 x i32> @test_add_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) {
+    %ret = add <8 x i32> %arg1, %arg2
+    ret <8 x i32> %ret
+  }
+
+  define <4 x i64> @test_add_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) {
+    %ret = add <4 x i64> %arg1, %arg2
+    ret <4 x i64> %ret
+  }
+...
+---
+name:            test_add_v32i8
+# ALL-LABEL: name:  test_add_v32i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX2:            registers:
+# AVX2-NEXT:         - { id: 0, class: vr256 }
+# AVX2-NEXT:         - { id: 1, class: vr256 }
+# AVX2-NEXT:         - { id: 2, class: vr256 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr256 }
+# AVX512VL-NEXT:     - { id: 1, class: vr256 }
+# AVX512VL-NEXT:     - { id: 2, class: vr256 }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# AVX2:                %2 = VPADDBYrr %0, %1
+#
+# AVX512VL:            %2 = VPADDBYrr %0, %1
+#
+# AVX512BWVL:          %2 = VPADDBZ256rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<32 x s8>) = COPY %ymm0
+    %1(<32 x s8>) = COPY %ymm1
+    %2(<32 x s8>) = G_ADD %0, %1
+    %ymm0 = COPY %2(<32 x s8>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_add_v16i16
+# ALL-LABEL: name:  test_add_v16i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX2:            registers:
+# AVX2-NEXT:         - { id: 0, class: vr256 }
+# AVX2-NEXT:         - { id: 1, class: vr256 }
+# AVX2-NEXT:         - { id: 2, class: vr256 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr256 }
+# AVX512VL-NEXT:     - { id: 1, class: vr256 }
+# AVX512VL-NEXT:     - { id: 2, class: vr256 }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# AVX2:                %2 = VPADDWYrr %0, %1
+#
+# AVX512VL:            %2 = VPADDWYrr %0, %1
+#
+# AVX512BWVL:          %2 = VPADDWZ256rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<16 x s16>) = COPY %ymm0
+    %1(<16 x s16>) = COPY %ymm1
+    %2(<16 x s16>) = G_ADD %0, %1
+    %ymm0 = COPY %2(<16 x s16>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_add_v8i32
+# ALL-LABEL: name:  test_add_v8i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX2:            registers:
+# AVX2-NEXT:         - { id: 0, class: vr256 }
+# AVX2-NEXT:         - { id: 1, class: vr256 }
+# AVX2-NEXT:         - { id: 2, class: vr256 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr256x }
+# AVX512VL-NEXT:     - { id: 1, class: vr256x }
+# AVX512VL-NEXT:     - { id: 2, class: vr256x }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# AVX2:                %2 = VPADDDYrr %0, %1
+#
+# AVX512VL:            %2 = VPADDDZ256rr %0, %1
+#
+# AVX512BWVL:          %2 = VPADDDZ256rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<8 x s32>) = COPY %ymm0
+    %1(<8 x s32>) = COPY %ymm1
+    %2(<8 x s32>) = G_ADD %0, %1
+    %ymm0 = COPY %2(<8 x s32>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_add_v4i64
+# ALL-LABEL: name:  test_add_v4i64
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX2:            registers:
+# AVX2-NEXT:         - { id: 0, class: vr256 }
+# AVX2-NEXT:         - { id: 1, class: vr256 }
+# AVX2-NEXT:         - { id: 2, class: vr256 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr256x }
+# AVX512VL-NEXT:     - { id: 1, class: vr256x }
+# AVX512VL-NEXT:     - { id: 2, class: vr256x }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# AVX2:                %2 = VPADDQYrr %0, %1
+#
+# AVX512VL:            %2 = VPADDQZ256rr %0, %1
+#
+# AVX512BWVL:          %2 = VPADDQZ256rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<4 x s64>) = COPY %ymm0
+    %1(<4 x s64>) = COPY %ymm1
+    %2(<4 x s64>) = G_ADD %0, %1
+    %ymm0 = COPY %2(<4 x s64>)
+    RET 0, implicit %ymm0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v512.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v512.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v512.mir
@@ -0,0 +1,130 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL
+
+--- |
+  define <64 x i8> @test_add_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) #0 {
+    %ret = add <64 x i8> %arg1, %arg2
+    ret <64 x i8> %ret
+  }
+
+  define <32 x i16> @test_add_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) #0 {
+    %ret = add <32 x i16> %arg1, %arg2
+    ret <32 x i16> %ret
+  }
+
+  define <16 x i32> @test_add_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) #1 {
+    %ret = add <16 x i32> %arg1, %arg2
+    ret <16 x i32> %ret
+  }
+
+  define <8 x i64> @test_add_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) #1 {
+    %ret = add <8 x i64> %arg1, %arg2
+    ret <8 x i64> %ret
+  }
+
+  attributes #0 = { "target-features"="+avx512f,+avx512bw" }
+  attributes #1 = { "target-features"="+avx512f" }
+...
+---
+name:            test_add_v64i8
+# ALL-LABEL: name:  test_add_v64i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr512 }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %2 = VPADDBZrr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<64 x s8>) = COPY %zmm0
+    %1(<64 x s8>) = COPY %zmm1
+    %2(<64 x s8>) = G_ADD %0, %1
+    %zmm0 = COPY %2(<64 x s8>)
+    RET 0, implicit %zmm0
+
+...
+---
+name:            test_add_v32i16
+# ALL-LABEL: name:  test_add_v32i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr512 }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %2 = VPADDWZrr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<32 x s16>) = COPY %zmm0
+    %1(<32 x s16>) = COPY %zmm1
+    %2(<32 x s16>) = G_ADD %0, %1
+    %zmm0 = COPY %2(<32 x s16>)
+    RET 0, implicit %zmm0
+
+...
+---
+name:            test_add_v16i32
+# ALL-LABEL: name:  test_add_v16i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr512 }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %2 = VPADDDZrr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<16 x s32>) = COPY %zmm0
+    %1(<16 x s32>) = COPY %zmm1
+    %2(<16 x s32>) = G_ADD %0, %1
+    %zmm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %zmm0
+
+...
+---
+name:            test_add_v8i64
+# ALL-LABEL: name:  test_add_v8i64
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr512 }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %2 = VPADDQZrr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<8 x s64>) = COPY %zmm0
+    %1(<8 x s64>) = COPY %zmm1
+    %2(<8 x s64>) = G_ADD %0, %1
+    %zmm0 = COPY %2(<8 x s64>)
+    RET 0, implicit %zmm0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
@@ -0,0 +1,195 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2                        -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=SSE2
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx                         -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=AVX1
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl           -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL
+
+--- |
+  define <16 x i8> @test_sub_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) {
+    %ret = sub <16 x i8> %arg1, %arg2
+    ret <16 x i8> %ret
+  }
+
+  define <8 x i16> @test_sub_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) {
+    %ret = sub <8 x i16> %arg1, %arg2
+    ret <8 x i16> %ret
+  }
+
+  define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
+    %ret = sub <4 x i32> %arg1, %arg2
+    ret <4 x i32> %ret
+  }
+
+  define <2 x i64> @test_sub_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) {
+    %ret = sub <2 x i64> %arg1, %arg2
+    ret <2 x i64> %ret
+  }
+
+...
+---
+name:            test_sub_v16i8
+# ALL-LABEL: name:  test_sub_v16i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+# NOVL:            registers:
+# NOVL-NEXT:         - { id: 0, class: vr128 }
+# NOVL-NEXT:         - { id: 1, class: vr128 }
+# NOVL-NEXT:         - { id: 2, class: vr128 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr128 }
+# AVX512VL-NEXT:     - { id: 1, class: vr128 }
+# AVX512VL-NEXT:     - { id: 2, class: vr128 }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# SSE2:                %2 = PSUBBrr %0, %1
+#
+# AVX1:                %2 = VPSUBBrr %0, %1
+#
+# AVX512VL:            %2 = VPSUBBrr %0, %1
+#
+# AVX512BWVL:          %2 = VPSUBBZ128rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<16 x s8>) = COPY %xmm0
+    %1(<16 x s8>) = COPY %xmm1
+    %2(<16 x s8>) = G_SUB %0, %1
+    %xmm0 = COPY %2(<16 x s8>)
+    RET 0, implicit %xmm0
+
+...
+---
+name:            test_sub_v8i16
+# ALL-LABEL: name:  test_sub_v8i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+# NOVL:            registers:
+# NOVL-NEXT:         - { id: 0, class: vr128 }
+# NOVL-NEXT:         - { id: 1, class: vr128 }
+# NOVL-NEXT:         - { id: 2, class: vr128 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr128 }
+# AVX512VL-NEXT:     - { id: 1, class: vr128 }
+# AVX512VL-NEXT:     - { id: 2, class: vr128 }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# SSE2:                %2 = PSUBWrr %0, %1
+#
+# AVX1:                %2 = VPSUBWrr %0, %1
+#
+# AVX512VL:            %2 = VPSUBWrr %0, %1
+#
+# AVX512BWVL:          %2 = VPSUBWZ128rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<8 x s16>) = COPY %xmm0
+    %1(<8 x s16>) = COPY %xmm1
+    %2(<8 x s16>) = G_SUB %0, %1
+    %xmm0 = COPY %2(<8 x s16>)
+    RET 0, implicit %xmm0
+
+...
+---
+name:            test_sub_v4i32
+# ALL-LABEL: name:  test_sub_v4i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+# NOVL:            registers:
+# NOVL-NEXT:         - { id: 0, class: vr128 }
+# NOVL-NEXT:         - { id: 1, class: vr128 }
+# NOVL-NEXT:         - { id: 2, class: vr128 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr128x }
+# AVX512VL-NEXT:     - { id: 1, class: vr128x }
+# AVX512VL-NEXT:     - { id: 2, class: vr128x }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# SSE2:                %2 = PSUBDrr %0, %1
+#
+# AVX1:                %2 = VPSUBDrr %0, %1
+#
+# AVX512VL:            %2 = VPSUBDZ128rr %0, %1
+#
+# AVX512BWVL:          %2 = VPSUBDZ128rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<4 x s32>) = COPY %xmm0
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<4 x s32>) = G_SUB %0, %1
+    %xmm0 = COPY %2(<4 x s32>)
+    RET 0, implicit %xmm0
+
+...
+---
+name:            test_sub_v2i64
+# ALL-LABEL: name:  test_sub_v2i64
+alignment:       4
+legalized:       true
+regBankSelected: true
+# NOVL:            registers:
+# NOVL-NEXT:         - { id: 0, class: vr128 }
+# NOVL-NEXT:         - { id: 1, class: vr128 }
+# NOVL-NEXT:         - { id: 2, class: vr128 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr128x }
+# AVX512VL-NEXT:     - { id: 1, class: vr128x }
+# AVX512VL-NEXT:     - { id: 2, class: vr128x }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# SSE2:                %2 = PSUBQrr %0, %1
+#
+# AVX1:                %2 = VPSUBQrr %0, %1
+#
+# AVX512VL:            %2 = VPSUBQZ128rr %0, %1
+#
+# AVX512BWVL:          %2 = VPSUBQZ128rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %xmm0, %xmm1
+
+    %0(<2 x s64>) = COPY %xmm0
+    %1(<2 x s64>) = COPY %xmm1
+    %2(<2 x s64>) = G_SUB %0, %1
+    %xmm0 = COPY %2(<2 x s64>)
+    RET 0, implicit %xmm0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
@@ -0,0 +1,185 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2                        -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl           -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL
+
+--- |
+  define <32 x i8> @test_sub_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) {
+    %ret = sub <32 x i8> %arg1, %arg2
+    ret <32 x i8> %ret
+  }
+
+  define <16 x i16> @test_sub_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) {
+    %ret = sub <16 x i16> %arg1, %arg2
+    ret <16 x i16> %ret
+  }
+
+  define <8 x i32> @test_sub_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) {
+    %ret = sub <8 x i32> %arg1, %arg2
+    ret <8 x i32> %ret
+  }
+
+  define <4 x i64> @test_sub_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) {
+    %ret = sub <4 x i64> %arg1, %arg2
+    ret <4 x i64> %ret
+  }
+...
+---
+name:            test_sub_v32i8
+# ALL-LABEL: name:  test_sub_v32i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX2:            registers:
+# AVX2-NEXT:         - { id: 0, class: vr256 }
+# AVX2-NEXT:         - { id: 1, class: vr256 }
+# AVX2-NEXT:         - { id: 2, class: vr256 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr256 }
+# AVX512VL-NEXT:     - { id: 1, class: vr256 }
+# AVX512VL-NEXT:     - { id: 2, class: vr256 }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# AVX2:                %2 = VPSUBBYrr %0, %1
+#
+# AVX512VL:            %2 = VPSUBBYrr %0, %1
+#
+# AVX512BWVL:          %2 = VPSUBBZ256rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<32 x s8>) = COPY %ymm0
+    %1(<32 x s8>) = COPY %ymm1
+    %2(<32 x s8>) = G_SUB %0, %1
+    %ymm0 = COPY %2(<32 x s8>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_sub_v16i16
+# ALL-LABEL: name:  test_sub_v16i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX2:            registers:
+# AVX2-NEXT:         - { id: 0, class: vr256 }
+# AVX2-NEXT:         - { id: 1, class: vr256 }
+# AVX2-NEXT:         - { id: 2, class: vr256 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr256 }
+# AVX512VL-NEXT:     - { id: 1, class: vr256 }
+# AVX512VL-NEXT:     - { id: 2, class: vr256 }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# AVX2:                %2 = VPSUBWYrr %0, %1
+#
+# AVX512VL:            %2 = VPSUBWYrr %0, %1
+#
+# AVX512BWVL:          %2 = VPSUBWZ256rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<16 x s16>) = COPY %ymm0
+    %1(<16 x s16>) = COPY %ymm1
+    %2(<16 x s16>) = G_SUB %0, %1
+    %ymm0 = COPY %2(<16 x s16>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_sub_v8i32
+# ALL-LABEL: name:  test_sub_v8i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX2:            registers:
+# AVX2-NEXT:         - { id: 0, class: vr256 }
+# AVX2-NEXT:         - { id: 1, class: vr256 }
+# AVX2-NEXT:         - { id: 2, class: vr256 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr256x }
+# AVX512VL-NEXT:     - { id: 1, class: vr256x }
+# AVX512VL-NEXT:     - { id: 2, class: vr256x }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# AVX2:                %2 = VPSUBDYrr %0, %1
+#
+# AVX512VL:            %2 = VPSUBDZ256rr %0, %1
+#
+# AVX512BWVL:          %2 = VPSUBDZ256rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<8 x s32>) = COPY %ymm0
+    %1(<8 x s32>) = COPY %ymm1
+    %2(<8 x s32>) = G_SUB %0, %1
+    %ymm0 = COPY %2(<8 x s32>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_sub_v4i64
+# ALL-LABEL: name:  test_sub_v4i64
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX2:            registers:
+# AVX2-NEXT:         - { id: 0, class: vr256 }
+# AVX2-NEXT:         - { id: 1, class: vr256 }
+# AVX2-NEXT:         - { id: 2, class: vr256 }
+#
+# AVX512VL:        registers:
+# AVX512VL-NEXT:     - { id: 0, class: vr256x }
+# AVX512VL-NEXT:     - { id: 1, class: vr256x }
+# AVX512VL-NEXT:     - { id: 2, class: vr256x }
+#
+# AVX512BWVL:      registers:
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# AVX2:                %2 = VPSUBQYrr %0, %1
+#
+# AVX512VL:            %2 = VPSUBQZ256rr %0, %1
+#
+# AVX512BWVL:          %2 = VPSUBQZ256rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<4 x s64>) = COPY %ymm0
+    %1(<4 x s64>) = COPY %ymm1
+    %2(<4 x s64>) = G_SUB %0, %1
+    %ymm0 = COPY %2(<4 x s64>)
+    RET 0, implicit %ymm0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
@@ -0,0 +1,130 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL
+
+--- |
+  define <64 x i8> @test_sub_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) #0 {
+    %ret = sub <64 x i8> %arg1, %arg2
+    ret <64 x i8> %ret
+  }
+
+  define <32 x i16> @test_sub_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) #0 {
+    %ret = sub <32 x i16> %arg1, %arg2
+    ret <32 x i16> %ret
+  }
+
+  define <16 x i32> @test_sub_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) #1 {
+    %ret = sub <16 x i32> %arg1, %arg2
+    ret <16 x i32> %ret
+  }
+
+  define <8 x i64> @test_sub_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) #1 {
+    %ret = sub <8 x i64> %arg1, %arg2
+    ret <8 x i64> %ret
+  }
+
+  attributes #0 = { "target-features"="+avx512f,+avx512bw" }
+  attributes #1 = { "target-features"="+avx512f" }
+...
+---
+name:            test_sub_v64i8
+# ALL-LABEL: name:  test_sub_v64i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr512 }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %2 = VPSUBBZrr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<64 x s8>) = COPY %zmm0
+    %1(<64 x s8>) = COPY %zmm1
+    %2(<64 x s8>) = G_SUB %0, %1
+    %zmm0 = COPY %2(<64 x s8>)
+    RET 0, implicit %zmm0
+
+...
+---
+name:            test_sub_v32i16
+# ALL-LABEL: name:  test_sub_v32i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr512 }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %2 = VPSUBWZrr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<32 x s16>) = COPY %zmm0
+    %1(<32 x s16>) = COPY %zmm1
+    %2(<32 x s16>) = G_SUB %0, %1
+    %zmm0 = COPY %2(<32 x s16>)
+    RET 0, implicit %zmm0
+
+...
+---
+name:            test_sub_v16i32
+# ALL-LABEL: name:  test_sub_v16i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr512 }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %2 = VPSUBDZrr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<16 x s32>) = COPY %zmm0
+    %1(<16 x s32>) = COPY %zmm1
+    %2(<16 x s32>) = G_SUB %0, %1
+    %zmm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %zmm0
+
+...
+---
+name:            test_sub_v8i64
+# ALL-LABEL: name:  test_sub_v8i64
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr512 }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %2 = VPSUBQZrr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %zmm1
+
+    %0(<8 x s64>) = COPY %zmm0
+    %1(<8 x s64>) = COPY %zmm1
+    %2(<8 x s64>) = G_SUB %0, %1
+    %zmm0 = COPY %2(<8 x s64>)
+    RET 0, implicit %zmm0
+
+...
Index: llvm/trunk/test/CodeGen/X86/GlobalISel/sub-vec.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/sub-vec.ll
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/sub-vec.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -global-isel < %s -o - | FileCheck %s --check-prefix=SKX
+
+define <16 x i8> @test_sub_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) {
+; SKX-LABEL: test_sub_v16i8:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %ret = sub <16 x i8> %arg1, %arg2
+  ret <16 x i8> %ret
+}
+
+define <8 x i16> @test_sub_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) {
+; SKX-LABEL: test_sub_v8i16:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %ret = sub <8 x i16> %arg1, %arg2
+  ret <8 x i16> %ret
+}
+
+define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
+; SKX-LABEL: test_sub_v4i32:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %ret = sub <4 x i32> %arg1, %arg2
+  ret <4 x i32> %ret
+}
+
+define <2 x i64> @test_sub_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) {
+; SKX-LABEL: test_sub_v2i64:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %ret = sub <2 x i64> %arg1, %arg2
+  ret <2 x i64> %ret
+}
+
+define <32 x i8> @test_sub_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) {
+; SKX-LABEL: test_sub_v32i8:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %ret = sub <32 x i8> %arg1, %arg2
+  ret <32 x i8> %ret
+}
+
+define <16 x i16> @test_sub_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) {
+; SKX-LABEL: test_sub_v16i16:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %ret = sub <16 x i16> %arg1, %arg2
+  ret <16 x i16> %ret
+}
+
+define <8 x i32> @test_sub_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) {
+; SKX-LABEL: test_sub_v8i32:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %ret = sub <8 x i32> %arg1, %arg2
+  ret <8 x i32> %ret
+}
+
+define <4 x i64> @test_sub_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) {
+; SKX-LABEL: test_sub_v4i64:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %ret = sub <4 x i64> %arg1, %arg2
+  ret <4 x i64> %ret
+}
+
+define <64 x i8> @test_sub_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) {
+; SKX-LABEL: test_sub_v64i8:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubb %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %ret = sub <64 x i8> %arg1, %arg2
+  ret <64 x i8> %ret
+}
+
+define <32 x i16> @test_sub_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) {
+; SKX-LABEL: test_sub_v32i16:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubw %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %ret = sub <32 x i16> %arg1, %arg2
+  ret <32 x i16> %ret
+}
+
+define <16 x i32> @test_sub_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) {
+; SKX-LABEL: test_sub_v16i32:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubd %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %ret = sub <16 x i32> %arg1, %arg2
+  ret <16 x i32> %ret
+}
+
+define <8 x i64> @test_sub_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) {
+; SKX-LABEL: test_sub_v8i64:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsubq %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %ret = sub <8 x i64> %arg1, %arg2
+  ret <8 x i64> %ret
+}
+