Index: lib/Target/X86/X86InstructionSelector.cpp
===================================================================
--- lib/Target/X86/X86InstructionSelector.cpp
+++ lib/Target/X86/X86InstructionSelector.cpp
@@ -75,6 +75,12 @@
   bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
                    MachineFunction &MF) const;
   bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
+                    MachineFunction &MF) const;
+
+  // emit insert subreg instruction and insert it before MachineInstr &I
+  bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
+                        MachineRegisterInfo &MRI, MachineFunction &MF) const;
 
   const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const;
   const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg,
@@ -259,6 +265,8 @@
     return true;
   if (selectUadde(I, MRI, MF))
     return true;
+  if (selectInsert(I, MRI, MF))
+    return true;
 
   return false;
 }
@@ -665,6 +673,105 @@
   return true;
 }
 
+bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg,
+                                              MachineInstr &I,
+                                              MachineRegisterInfo &MRI,
+                                              MachineFunction &MF) const {
+
+  const LLT DstTy = MRI.getType(DstReg);
+  const LLT SrcTy = MRI.getType(SrcReg);
+  unsigned SubIdx = X86::NoSubRegister;
+
+  // TODO: support scalar types
+  if (DstTy.isVector() && SrcTy.isVector()) {
+    assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() &&
+           "Incorrect Src/Dst register size");
+
+    if (SrcTy.getSizeInBits() == 128)
+      SubIdx = X86::sub_xmm;
+    else if (SrcTy.getSizeInBits() == 256)
+      SubIdx = X86::sub_ymm;
+    else
+      return false;
+  } else
+    return false;
+
+  const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
+  const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
+
+  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
+      !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+    DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n");
+    return false;
+  }
+
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY))
+      .addReg(DstReg, RegState::DefineNoRead, SubIdx)
+      .addReg(SrcReg);
+
+  return true;
+}
+
+bool X86InstructionSelector::selectInsert(MachineInstr &I,
+                                          MachineRegisterInfo &MRI,
+                                          MachineFunction &MF) const {
+
+  if (I.getOpcode() != TargetOpcode::G_INSERT)
+    return false;
+
+  const unsigned DstReg = I.getOperand(0).getReg();
+  const unsigned SrcReg = I.getOperand(1).getReg();
+  const unsigned InsertReg = I.getOperand(2).getReg();
+  int64_t Index = I.getOperand(3).getImm();
+
+  const LLT DstTy = MRI.getType(DstReg);
+  const LLT InsertRegTy = MRI.getType(InsertReg);
+
+  // Meanwile handle vector type only.
+  if (!DstTy.isVector())
+    return false;
+
+  if (Index % InsertRegTy.getSizeInBits() != 0)
+    return false; // Not insert subvector.
+
+  if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) {
+    // Replace by subreg copy.
+    if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF))
+      return false;
+
+    I.eraseFromParent();
+    return true;
+  }
+
+  bool HasAVX = STI.hasAVX();
+  bool HasAVX512 = STI.hasAVX512();
+  bool HasVLX = STI.hasVLX();
+
+  if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) {
+    if (HasVLX)
+      I.setDesc(TII.get(X86::VINSERTF32x4Z256rr));
+    else if (HasAVX)
+      I.setDesc(TII.get(X86::VINSERTF128rr));
+    else
+      return false;
+  } else if (DstTy.getSizeInBits() == 512 && HasAVX512) {
+    if (InsertRegTy.getSizeInBits() == 128)
+      I.setDesc(TII.get(X86::VINSERTF32x4Zrr));
+    else if (InsertRegTy.getSizeInBits() == 256)
+      I.setDesc(TII.get(X86::VINSERTF64x4Zrr));
+    else
+      return false;
+  } else
+    return false;
+
+  // Convert to X86 VINSERT immediate.
+  Index = Index / InsertRegTy.getSizeInBits();
+
+  I.getOperand(3).setImm(Index);
+
+  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
 InstructionSelector *
 llvm::createX86InstructionSelector(const X86TargetMachine &TM,
                                    X86Subtarget &Subtarget,
Index: lib/Target/X86/X86LegalizerInfo.cpp
===================================================================
--- lib/Target/X86/X86LegalizerInfo.cpp
+++ lib/Target/X86/X86LegalizerInfo.cpp
@@ -214,12 +214,24 @@
   if (!Subtarget.hasAVX())
     return;
 
+  const LLT v16s8 = LLT::vector(16, 8);
+  const LLT v8s16 = LLT::vector(8, 16);
+  const LLT v4s32 = LLT::vector(4, 32);
+  const LLT v2s64 = LLT::vector(2, 64);
+
+  const LLT v32s8 = LLT::vector(32, 8);
+  const LLT v16s16 = LLT::vector(16, 16);
   const LLT v8s32 = LLT::vector(8, 32);
   const LLT v4s64 = LLT::vector(4, 64);
 
   for (unsigned MemOp : {G_LOAD, G_STORE})
     for (auto Ty : {v8s32, v4s64})
       setAction({MemOp, Ty}, Legal);
+
+  for (auto Ty : {v32s8, v16s16, v8s32, v4s64})
+    setAction({G_INSERT, Ty}, Legal);
+  for (auto Ty : {v16s8, v8s16, v4s32, v2s64})
+    setAction({G_INSERT, 1, Ty}, Legal);
 }
 
 void X86LegalizerInfo::setLegalizerInfoAVX2() {
@@ -243,6 +255,18 @@
   if (!Subtarget.hasAVX512())
     return;
 
+  const LLT v16s8 = LLT::vector(16, 8);
+  const LLT v8s16 = LLT::vector(8, 16);
+  const LLT v4s32 = LLT::vector(4, 32);
+  const LLT v2s64 = LLT::vector(2, 64);
+
+  const LLT v32s8 = LLT::vector(32, 8);
+  const LLT v16s16 = LLT::vector(16, 16);
+  const LLT v8s32 = LLT::vector(8, 32);
+  const LLT v4s64 = LLT::vector(4, 64);
+
+  const LLT v64s8 = LLT::vector(64, 8);
+  const LLT v32s16 = LLT::vector(32, 16);
   const LLT v16s32 = LLT::vector(16, 32);
   const LLT v8s64 = LLT::vector(8, 64);
 
@@ -256,13 +280,15 @@
     for (auto Ty : {v16s32, v8s64})
       setAction({MemOp, Ty}, Legal);
 
+  for (auto Ty : {v64s8, v32s16, v16s32, v8s64})
+    setAction({G_INSERT, Ty}, Legal);
+  for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64})
+    setAction({G_INSERT, 1, Ty}, Legal);
+
   /************ VLX *******************/
   if (!Subtarget.hasVLX())
     return;
 
-  const LLT v4s32 = LLT::vector(4, 32);
-  const LLT v8s32 = LLT::vector(8, 32);
-
   for (auto Ty : {v4s32, v8s32})
     setAction({G_MUL, Ty}, Legal);
 }
Index: test/CodeGen/X86/GlobalISel/legalize-insert-vec256.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/GlobalISel/legalize-insert-vec256.mir
@@ -0,0 +1,33 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx                -global-isel -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl  -global-isel -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
+--- |
+  define void @test_insert_128() {
+    ret void
+  }
+...
+---
+name:            test_insert_128
+# ALL-LABEL: name:  test_insert_128
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:               %0(<8 x s32>) = COPY %ymm0
+# ALL-NEXT:          %1(<4 x s32>) = COPY %xmm1
+# ALL-NEXT:          %2(<8 x s32>) = G_INSERT %0, %1(<4 x s32>), 0
+# ALL-NEXT:          %ymm0 = COPY %2(<8 x s32>)
+# ALL-NEXT:          RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<8 x s32>) = COPY %ymm0
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<8 x s32>) = G_INSERT %0(<8 x s32>), %1(<4 x s32>), 0
+    %ymm0 = COPY %2(<8 x s32>)
+    RET 0, implicit %ymm0
+
+...
Index: test/CodeGen/X86/GlobalISel/legalize-insert-vec512.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/GlobalISel/legalize-insert-vec512.mir
@@ -0,0 +1,63 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL
+
+--- |
+  define void @test_insert_128() {
+    ret void
+  }
+
+  define void @test_insert_256() {
+    ret void
+  }
+...
+---
+name:            test_insert_128
+# ALL-LABEL: name:  test_insert_128
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<16 x s32>) = COPY %zmm0
+# ALL-NEXT:     %1(<4 x s32>) = COPY %xmm1
+# ALL-NEXT:     %2(<16 x s32>) = G_INSERT %0, %1(<4 x s32>), 0
+# ALL-NEXT:     %ymm0 = COPY %2(<16 x s32>)
+# ALL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %ymm1
+
+    %0(<16 x s32>) = COPY %zmm0
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<4 x s32>), 0
+    %ymm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_insert_256
+# ALL-LABEL: name:  test_insert_256
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# ALL:          %0(<16 x s32>) = COPY %zmm0
+# ALL-NEXT:     %1(<8 x s32>) = COPY %ymm1
+# ALL-NEXT:     %2(<16 x s32>) = G_INSERT %0, %1(<8 x s32>), 0
+# ALL-NEXT:     %ymm0 = COPY %2(<16 x s32>)
+# ALL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %ymm1
+
+    %0(<16 x s32>) = COPY %zmm0
+    %1(<8 x s32>) = COPY %ymm1
+    %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<8 x s32>), 0
+    %ymm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %ymm0
+
+...
Index: test/CodeGen/X86/GlobalISel/select-insert-vec256.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/GlobalISel/select-insert-vec256.mir
@@ -0,0 +1,176 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx                -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl  -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
+--- |
+  define void @test_insert_128_idx0() {
+    ret void
+  }
+
+  define void @test_insert_128_idx0_undef() {
+    ret void
+  }
+
+  define void @test_insert_128_idx1() {
+    ret void
+  }
+
+  define void @test_insert_128_idx1_undef() {
+    ret void
+  }
+
+...
+---
+name:            test_insert_128_idx0
+# ALL-LABEL: name:  test_insert_128_idx0
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX:           registers:
+# AVX-NEXT:        - { id: 0, class: vr256 }
+# AVX-NEXT:        - { id: 1, class: vr128 }
+# AVX-NEXT:        - { id: 2, class: vr256 }
+#
+# AVX512VL:      registers:
+# AVX512VL-NEXT:   - { id: 0, class: vr256x }
+# AVX512VL-NEXT:   - { id: 1, class: vr128x }
+# AVX512VL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# AVX:               %0 = COPY %ymm0
+# AVX-NEXT:          %1 = COPY %xmm1
+# AVX-NEXT:          %2 = VINSERTF128rr %0, %1, 0
+# AVX-NEXT:          %ymm0 = COPY %2
+# AVX-NEXT:          RET 0, implicit %ymm0
+#
+# AVX512VL:          %0 = COPY %ymm0
+# AVX512VL-NEXT:     %1 = COPY %xmm1
+# AVX512VL-NEXT:     %2 = VINSERTF32x4Z256rr %0, %1, 0
+# AVX512VL-NEXT:     %ymm0 = COPY %2
+# AVX512VL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<8 x s32>) = COPY %ymm0
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<8 x s32>) = G_INSERT %0(<8 x s32>), %1(<4 x s32>), 0
+    %ymm0 = COPY %2(<8 x s32>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_insert_128_idx0_undef
+# ALL-LABEL: name:  test_insert_128_idx0_undef
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX:           registers:
+# AVX-NEXT:        - { id: 0, class: vecr }
+# AVX-NEXT:        - { id: 1, class: vr128 }
+# AVX-NEXT:        - { id: 2, class: vr256 }
+#
+# AVX512VL:      registers:
+# AVX512VL-NEXT:   - { id: 0, class: vecr }
+# AVX512VL-NEXT:   - { id: 1, class: vr128x }
+# AVX512VL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:               %1 = COPY %xmm1
+# ALL-NEXT:          undef %2.sub_xmm = COPY %1
+# ALL-NEXT:          %ymm0 = COPY %2
+# ALL-NEXT:          RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<8 x s32>) = IMPLICIT_DEF
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<8 x s32>) = G_INSERT %0(<8 x s32>), %1(<4 x s32>), 0
+    %ymm0 = COPY %2(<8 x s32>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_insert_128_idx1
+# ALL-LABEL: name:  test_insert_128_idx1
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX:           registers:
+# AVX-NEXT:        - { id: 0, class: vr256 }
+# AVX-NEXT:        - { id: 1, class: vr128 }
+# AVX-NEXT:        - { id: 2, class: vr256 }
+#
+# AVX512VL:      registers:
+# AVX512VL-NEXT:   - { id: 0, class: vr256x }
+# AVX512VL-NEXT:   - { id: 1, class: vr128x }
+# AVX512VL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# AVX:               %0 = COPY %ymm0
+# AVX-NEXT:          %1 = COPY %xmm1
+# AVX-NEXT:          %2 = VINSERTF128rr %0, %1, 1
+# AVX-NEXT:          %ymm0 = COPY %2
+# AVX-NEXT:          RET 0, implicit %ymm0
+#
+# AVX512VL:          %0 = COPY %ymm0
+# AVX512VL-NEXT:     %1 = COPY %xmm1
+# AVX512VL-NEXT:     %2 = VINSERTF32x4Z256rr %0, %1, 1
+# AVX512VL-NEXT:     %ymm0 = COPY %2
+# AVX512VL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<8 x s32>) = COPY %ymm0
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<8 x s32>) = G_INSERT %0(<8 x s32>), %1(<4 x s32>), 128
+    %ymm0 = COPY %2(<8 x s32>)
+    RET 0, implicit %ymm0
+...
+---
+name:            test_insert_128_idx1_undef
+# ALL-LABEL: name:  test_insert_128_idx1_undef
+alignment:       4
+legalized:       true
+regBankSelected: true
+# AVX:           registers:
+# AVX-NEXT:        - { id: 0, class: vr256 }
+# AVX-NEXT:        - { id: 1, class: vr128 }
+# AVX-NEXT:        - { id: 2, class: vr256 }
+#
+# AVX512VL:      registers:
+# AVX512VL-NEXT:   - { id: 0, class: vr256x }
+# AVX512VL-NEXT:   - { id: 1, class: vr128x }
+# AVX512VL-NEXT:   - { id: 2, class: vr256x }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# AVX:               %0 = IMPLICIT_DEF
+# AVX-NEXT:          %1 = COPY %xmm1
+# AVX-NEXT:          %2 = VINSERTF128rr %0, %1, 1
+# AVX-NEXT:          %ymm0 = COPY %2
+# AVX-NEXT:          RET 0, implicit %ymm0
+#
+# AVX512VL:          %0 = IMPLICIT_DEF
+# AVX512VL-NEXT:     %1 = COPY %xmm1
+# AVX512VL-NEXT:     %2 = VINSERTF32x4Z256rr %0, %1, 1
+# AVX512VL-NEXT:     %ymm0 = COPY %2
+# AVX512VL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<8 x s32>) = IMPLICIT_DEF
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<8 x s32>) = G_INSERT %0(<8 x s32>), %1(<4 x s32>), 128
+    %ymm0 = COPY %2(<8 x s32>)
+    RET 0, implicit %ymm0
+...
+
Index: test/CodeGen/X86/GlobalISel/select-insert-vec512.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/GlobalISel/select-insert-vec512.mir
@@ -0,0 +1,271 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL
+
+--- |
+  define void @test_insert_128_idx0() {
+    ret void
+  }
+
+  define void @test_insert_128_idx0_undef() {
+    ret void
+  }
+
+  define void @test_insert_128_idx1() {
+    ret void
+  }
+
+  define void @test_insert_128_idx1_undef() {
+    ret void
+  }
+
+  define void @test_insert_256_idx0() {
+    ret void
+  }
+
+  define void @test_insert_256_idx0_undef() {
+    ret void
+  }
+
+  define void @test_insert_256_idx1() {
+    ret void
+  }
+
+  define void @test_insert_256_idx1_undef() {
+    ret void
+  }
+
+...
+---
+name:            test_insert_128_idx0
+# ALL-LABEL: name:  test_insert_128_idx0
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr128x }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %0 = COPY %zmm0
+# ALL-NEXT:     %1 = COPY %xmm1
+# ALL-NEXT:     %2 = VINSERTF32x4Zrr %0, %1, 0
+# ALL-NEXT:     %ymm0 = COPY %2
+# ALL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %ymm1
+
+    %0(<16 x s32>) = COPY %zmm0
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<4 x s32>), 0
+    %ymm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_insert_128_idx0_undef
+# ALL-LABEL: name:  test_insert_128_idx0_undef
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vecr }
+# ALL-NEXT:   - { id: 1, class: vr128x }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %1 = COPY %xmm1
+# ALL-NEXT:     undef %2.sub_xmm = COPY %1
+# ALL-NEXT:     %ymm0 = COPY %2
+# ALL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<16 x s32>) = IMPLICIT_DEF
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<4 x s32>), 0
+    %ymm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_insert_128_idx1
+# ALL-LABEL: name:  test_insert_128_idx1
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr128x }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %0 = COPY %zmm0
+# ALL-NEXT:     %1 = COPY %xmm1
+# ALL-NEXT:     %2 = VINSERTF32x4Zrr %0, %1, 1
+# ALL-NEXT:     %ymm0 = COPY %2
+# ALL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<16 x s32>) = COPY %zmm0
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<4 x s32>), 128
+    %ymm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %ymm0
+...
+---
+name:            test_insert_128_idx1_undef
+# ALL-LABEL: name:  test_insert_128_idx1_undef
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr128x }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %0 = IMPLICIT_DEF
+# ALL-NEXT:     %1 = COPY %xmm1
+# ALL-NEXT:     %2 = VINSERTF32x4Zrr %0, %1, 1
+# ALL-NEXT:     %ymm0 = COPY %2
+# ALL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<16 x s32>) = IMPLICIT_DEF
+    %1(<4 x s32>) = COPY %xmm1
+    %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<4 x s32>), 128
+    %ymm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %ymm0
+...
+---
+name:            test_insert_256_idx0
+# ALL-LABEL: name:  test_insert_256_idx0
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr256x }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %0 = COPY %zmm0
+# ALL-NEXT:     %1 = COPY %ymm1
+# ALL-NEXT:     %2 = VINSERTF64x4Zrr %0, %1, 0
+# ALL-NEXT:     %ymm0 = COPY %2
+# ALL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %zmm0, %ymm1
+
+    %0(<16 x s32>) = COPY %zmm0
+    %1(<8 x s32>) = COPY %ymm1
+    %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<8 x s32>), 0
+    %ymm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_insert_256_idx0_undef
+# ALL-LABEL: name:  test_insert_256_idx0_undef
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vecr }
+# ALL-NEXT:   - { id: 1, class: vr256x }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %1 = COPY %ymm1
+# ALL-NEXT:     undef %2.sub_ymm = COPY %1
+# ALL-NEXT:     %ymm0 = COPY %2
+# ALL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<16 x s32>) = IMPLICIT_DEF
+    %1(<8 x s32>) = COPY %ymm1
+    %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<8 x s32>), 0
+    %ymm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %ymm0
+
+...
+---
+name:            test_insert_256_idx1
+# ALL-LABEL: name:  test_insert_256_idx1
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr256x }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %0 = COPY %zmm0
+# ALL-NEXT:     %1 = COPY %ymm1
+# ALL-NEXT:     %2 = VINSERTF64x4Zrr %0, %1, 1
+# ALL-NEXT:     %ymm0 = COPY %2
+# ALL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<16 x s32>) = COPY %zmm0
+    %1(<8 x s32>) = COPY %ymm1
+    %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<8 x s32>), 256
+    %ymm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %ymm0
+...
+---
+name:            test_insert_256_idx1_undef
+# ALL-LABEL: name:  test_insert_256_idx1_undef
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: vr512 }
+# ALL-NEXT:   - { id: 1, class: vr256x }
+# ALL-NEXT:   - { id: 2, class: vr512 }
+registers:
+  - { id: 0, class: vecr }
+  - { id: 1, class: vecr }
+  - { id: 2, class: vecr }
+# ALL:          %0 = IMPLICIT_DEF
+# ALL-NEXT:     %1 = COPY %ymm1
+# ALL-NEXT:     %2 = VINSERTF64x4Zrr %0, %1, 1
+# ALL-NEXT:     %ymm0 = COPY %2
+# ALL-NEXT:     RET 0, implicit %ymm0
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %ymm0, %ymm1
+
+    %0(<16 x s32>) = IMPLICIT_DEF
+    %1(<8 x s32>) = COPY %ymm1
+    %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<8 x s32>), 256
+    %ymm0 = COPY %2(<16 x s32>)
+    RET 0, implicit %ymm0
+...
+