Index: lib/Target/X86/X86InstructionSelector.cpp =================================================================== --- lib/Target/X86/X86InstructionSelector.cpp +++ lib/Target/X86/X86InstructionSelector.cpp @@ -75,6 +75,12 @@ bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + + // emit insert subreg instruction and insert it before MachineInstr &I + bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, + MachineRegisterInfo &MRI, MachineFunction &MF) const; const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const; const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg, @@ -259,6 +265,8 @@ return true; if (selectUadde(I, MRI, MF)) return true; + if (selectInsert(I, MRI, MF)) + return true; return false; } @@ -665,6 +673,105 @@ return true; } +bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg, + MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + unsigned SubIdx = X86::NoSubRegister; + + // TODO: support scalar types + if (DstTy.isVector() && SrcTy.isVector()) { + assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() && + "Incorrect Src/Dst register size"); + + if (SrcTy.getSizeInBits() == 128) + SubIdx = X86::sub_xmm; + else if (SrcTy.getSizeInBits() == 256) + SubIdx = X86::sub_ymm; + else + return false; + } else + return false; + + const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); + const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); + + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n"); + return false; + } + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY)) + .addReg(DstReg, RegState::DefineNoRead, SubIdx) + .addReg(SrcReg); + + return true; +} + +bool X86InstructionSelector::selectInsert(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + if (I.getOpcode() != TargetOpcode::G_INSERT) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned SrcReg = I.getOperand(1).getReg(); + const unsigned InsertReg = I.getOperand(2).getReg(); + int64_t Index = I.getOperand(3).getImm(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT InsertRegTy = MRI.getType(InsertReg); + + // Meanwile handle vector type only. + if (!DstTy.isVector()) + return false; + + if (Index % InsertRegTy.getSizeInBits() != 0) + return false; // Not insert subvector. + + if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) { + // Replace by subreg copy. + if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF)) + return false; + + I.eraseFromParent(); + return true; + } + + bool HasAVX = STI.hasAVX(); + bool HasAVX512 = STI.hasAVX512(); + bool HasVLX = STI.hasVLX(); + + if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) { + if (HasVLX) + I.setDesc(TII.get(X86::VINSERTF32x4Z256rr)); + else if (HasAVX) + I.setDesc(TII.get(X86::VINSERTF128rr)); + else + return false; + } else if (DstTy.getSizeInBits() == 512 && HasAVX512) { + if (InsertRegTy.getSizeInBits() == 128) + I.setDesc(TII.get(X86::VINSERTF32x4Zrr)); + else if (InsertRegTy.getSizeInBits() == 256) + I.setDesc(TII.get(X86::VINSERTF64x4Zrr)); + else + return false; + } else + return false; + + // Convert to X86 VINSERT immediate. + Index = Index / InsertRegTy.getSizeInBits(); + + I.getOperand(3).setImm(Index); + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + InstructionSelector * llvm::createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &Subtarget, Index: lib/Target/X86/X86LegalizerInfo.cpp =================================================================== --- lib/Target/X86/X86LegalizerInfo.cpp +++ lib/Target/X86/X86LegalizerInfo.cpp @@ -214,12 +214,24 @@ if (!Subtarget.hasAVX()) return; + const LLT v16s8 = LLT::vector(16, 8); + const LLT v8s16 = LLT::vector(8, 16); + const LLT v4s32 = LLT::vector(4, 32); + const LLT v2s64 = LLT::vector(2, 64); + + const LLT v32s8 = LLT::vector(32, 8); + const LLT v16s16 = LLT::vector(16, 16); const LLT v8s32 = LLT::vector(8, 32); const LLT v4s64 = LLT::vector(4, 64); for (unsigned MemOp : {G_LOAD, G_STORE}) for (auto Ty : {v8s32, v4s64}) setAction({MemOp, Ty}, Legal); + + for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) + setAction({G_INSERT, Ty}, Legal); + for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) + setAction({G_INSERT, 1, Ty}, Legal); } void X86LegalizerInfo::setLegalizerInfoAVX2() { @@ -243,6 +255,18 @@ if (!Subtarget.hasAVX512()) return; + const LLT v16s8 = LLT::vector(16, 8); + const LLT v8s16 = LLT::vector(8, 16); + const LLT v4s32 = LLT::vector(4, 32); + const LLT v2s64 = LLT::vector(2, 64); + + const LLT v32s8 = LLT::vector(32, 8); + const LLT v16s16 = LLT::vector(16, 16); + const LLT v8s32 = LLT::vector(8, 32); + const LLT v4s64 = LLT::vector(4, 64); + + const LLT v64s8 = LLT::vector(64, 8); + const LLT v32s16 = LLT::vector(32, 16); const LLT v16s32 = LLT::vector(16, 32); const LLT v8s64 = LLT::vector(8, 64); @@ -256,13 +280,15 @@ for (auto Ty : {v16s32, v8s64}) setAction({MemOp, Ty}, Legal); + for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) + setAction({G_INSERT, Ty}, Legal); + for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) + setAction({G_INSERT, 1, Ty}, Legal); + /************ VLX *******************/ if (!Subtarget.hasVLX()) return; - const LLT v4s32 = LLT::vector(4, 32); - const LLT v8s32 = LLT::vector(8, 32); - for (auto Ty : {v4s32, v8s32}) setAction({G_MUL, Ty}, Legal); } Index: test/CodeGen/X86/GlobalISel/legalize-insert-vec256.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/GlobalISel/legalize-insert-vec256.mir @@ -0,0 +1,33 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL +--- | + define void @test_insert_128() { + ret void + } +... +--- +name: test_insert_128 +# ALL-LABEL: name: test_insert_128 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<8 x s32>) = COPY %ymm0 +# ALL-NEXT: %1(<4 x s32>) = COPY %xmm1 +# ALL-NEXT: %2(<8 x s32>) = G_INSERT %0, %1(<4 x s32>), 0 +# ALL-NEXT: %ymm0 = COPY %2(<8 x s32>) +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<8 x s32>) = COPY %ymm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<8 x s32>) = G_INSERT %0(<8 x s32>), %1(<4 x s32>), 0 + %ymm0 = COPY %2(<8 x s32>) + RET 0, implicit %ymm0 + +... Index: test/CodeGen/X86/GlobalISel/legalize-insert-vec512.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/GlobalISel/legalize-insert-vec512.mir @@ -0,0 +1,63 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL + +--- | + define void @test_insert_128() { + ret void + } + + define void @test_insert_256() { + ret void + } +... +--- +name: test_insert_128 +# ALL-LABEL: name: test_insert_128 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<16 x s32>) = COPY %zmm0 +# ALL-NEXT: %1(<4 x s32>) = COPY %xmm1 +# ALL-NEXT: %2(<16 x s32>) = G_INSERT %0, %1(<4 x s32>), 0 +# ALL-NEXT: %ymm0 = COPY %2(<16 x s32>) +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %ymm1 + + %0(<16 x s32>) = COPY %zmm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<4 x s32>), 0 + %ymm0 = COPY %2(<16 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_insert_256 +# ALL-LABEL: name: test_insert_256 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<16 x s32>) = COPY %zmm0 +# ALL-NEXT: %1(<8 x s32>) = COPY %ymm1 +# ALL-NEXT: %2(<16 x s32>) = G_INSERT %0, %1(<8 x s32>), 0 +# ALL-NEXT: %ymm0 = COPY %2(<16 x s32>) +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %ymm1 + + %0(<16 x s32>) = COPY %zmm0 + %1(<8 x s32>) = COPY %ymm1 + %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<8 x s32>), 0 + %ymm0 = COPY %2(<16 x s32>) + RET 0, implicit %ymm0 + +... Index: test/CodeGen/X86/GlobalISel/select-insert-vec256.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/GlobalISel/select-insert-vec256.mir @@ -0,0 +1,176 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL +--- | + define void @test_insert_128_idx0() { + ret void + } + + define void @test_insert_128_idx0_undef() { + ret void + } + + define void @test_insert_128_idx1() { + ret void + } + + define void @test_insert_128_idx1_undef() { + ret void + } + +... +--- +name: test_insert_128_idx0 +# ALL-LABEL: name: test_insert_128_idx0 +alignment: 4 +legalized: true +regBankSelected: true +# AVX: registers: +# AVX-NEXT: - { id: 0, class: vr256 } +# AVX-NEXT: - { id: 1, class: vr128 } +# AVX-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256x } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# AVX: %0 = COPY %ymm0 +# AVX-NEXT: %1 = COPY %xmm1 +# AVX-NEXT: %2 = VINSERTF128rr %0, %1, 0 +# AVX-NEXT: %ymm0 = COPY %2 +# AVX-NEXT: RET 0, implicit %ymm0 +# +# AVX512VL: %0 = COPY %ymm0 +# AVX512VL-NEXT: %1 = COPY %xmm1 +# AVX512VL-NEXT: %2 = VINSERTF32x4Z256rr %0, %1, 0 +# AVX512VL-NEXT: %ymm0 = COPY %2 +# AVX512VL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<8 x s32>) = COPY %ymm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<8 x s32>) = G_INSERT %0(<8 x s32>), %1(<4 x s32>), 0 + %ymm0 = COPY %2(<8 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_insert_128_idx0_undef +# ALL-LABEL: name: test_insert_128_idx0_undef +alignment: 4 +legalized: true +regBankSelected: true +# AVX: registers: +# AVX-NEXT: - { id: 0, class: vecr } +# AVX-NEXT: - { id: 1, class: vr128 } +# AVX-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vecr } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %1 = COPY %xmm1 +# ALL-NEXT: undef %2.sub_xmm = COPY %1 +# ALL-NEXT: %ymm0 = COPY %2 +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<8 x s32>) = IMPLICIT_DEF + %1(<4 x s32>) = COPY %xmm1 + %2(<8 x s32>) = G_INSERT %0(<8 x s32>), %1(<4 x s32>), 0 + %ymm0 = COPY %2(<8 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_insert_128_idx1 +# ALL-LABEL: name: test_insert_128_idx1 +alignment: 4 +legalized: true +regBankSelected: true +# AVX: registers: +# AVX-NEXT: - { id: 0, class: vr256 } +# AVX-NEXT: - { id: 1, class: vr128 } +# AVX-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256x } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# AVX: %0 = COPY %ymm0 +# AVX-NEXT: %1 = COPY %xmm1 +# AVX-NEXT: %2 = VINSERTF128rr %0, %1, 1 +# AVX-NEXT: %ymm0 = COPY %2 +# AVX-NEXT: RET 0, implicit %ymm0 +# +# AVX512VL: %0 = COPY %ymm0 +# AVX512VL-NEXT: %1 = COPY %xmm1 +# AVX512VL-NEXT: %2 = VINSERTF32x4Z256rr %0, %1, 1 +# AVX512VL-NEXT: %ymm0 = COPY %2 +# AVX512VL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<8 x s32>) = COPY %ymm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<8 x s32>) = G_INSERT %0(<8 x s32>), %1(<4 x s32>), 128 + %ymm0 = COPY %2(<8 x s32>) + RET 0, implicit %ymm0 +... +--- +name: test_insert_128_idx1_undef +# ALL-LABEL: name: test_insert_128_idx1_undef +alignment: 4 +legalized: true +regBankSelected: true +# AVX: registers: +# AVX-NEXT: - { id: 0, class: vr256 } +# AVX-NEXT: - { id: 1, class: vr128 } +# AVX-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256x } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# AVX: %0 = IMPLICIT_DEF +# AVX-NEXT: %1 = COPY %xmm1 +# AVX-NEXT: %2 = VINSERTF128rr %0, %1, 1 +# AVX-NEXT: %ymm0 = COPY %2 +# AVX-NEXT: RET 0, implicit %ymm0 +# +# AVX512VL: %0 = IMPLICIT_DEF +# AVX512VL-NEXT: %1 = COPY %xmm1 +# AVX512VL-NEXT: %2 = VINSERTF32x4Z256rr %0, %1, 1 +# AVX512VL-NEXT: %ymm0 = COPY %2 +# AVX512VL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<8 x s32>) = IMPLICIT_DEF + %1(<4 x s32>) = COPY %xmm1 + %2(<8 x s32>) = G_INSERT %0(<8 x s32>), %1(<4 x s32>), 128 + %ymm0 = COPY %2(<8 x s32>) + RET 0, implicit %ymm0 +... + Index: test/CodeGen/X86/GlobalISel/select-insert-vec512.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/GlobalISel/select-insert-vec512.mir @@ -0,0 +1,271 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL + +--- | + define void @test_insert_128_idx0() { + ret void + } + + define void @test_insert_128_idx0_undef() { + ret void + } + + define void @test_insert_128_idx1() { + ret void + } + + define void @test_insert_128_idx1_undef() { + ret void + } + + define void @test_insert_256_idx0() { + ret void + } + + define void @test_insert_256_idx0_undef() { + ret void + } + + define void @test_insert_256_idx1() { + ret void + } + + define void @test_insert_256_idx1_undef() { + ret void + } + +... +--- +name: test_insert_128_idx0 +# ALL-LABEL: name: test_insert_128_idx0 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr128x } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %zmm0 +# ALL-NEXT: %1 = COPY %xmm1 +# ALL-NEXT: %2 = VINSERTF32x4Zrr %0, %1, 0 +# ALL-NEXT: %ymm0 = COPY %2 +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %ymm1 + + %0(<16 x s32>) = COPY %zmm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<4 x s32>), 0 + %ymm0 = COPY %2(<16 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_insert_128_idx0_undef +# ALL-LABEL: name: test_insert_128_idx0_undef +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vecr } +# ALL-NEXT: - { id: 1, class: vr128x } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %1 = COPY %xmm1 +# ALL-NEXT: undef %2.sub_xmm = COPY %1 +# ALL-NEXT: %ymm0 = COPY %2 +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<16 x s32>) = IMPLICIT_DEF + %1(<4 x s32>) = COPY %xmm1 + %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<4 x s32>), 0 + %ymm0 = COPY %2(<16 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_insert_128_idx1 +# ALL-LABEL: name: test_insert_128_idx1 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr128x } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %zmm0 +# ALL-NEXT: %1 = COPY %xmm1 +# ALL-NEXT: %2 = VINSERTF32x4Zrr %0, %1, 1 +# ALL-NEXT: %ymm0 = COPY %2 +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<16 x s32>) = COPY %zmm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<4 x s32>), 128 + %ymm0 = COPY %2(<16 x s32>) + RET 0, implicit %ymm0 +... +--- +name: test_insert_128_idx1_undef +# ALL-LABEL: name: test_insert_128_idx1_undef +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr128x } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = IMPLICIT_DEF +# ALL-NEXT: %1 = COPY %xmm1 +# ALL-NEXT: %2 = VINSERTF32x4Zrr %0, %1, 1 +# ALL-NEXT: %ymm0 = COPY %2 +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<16 x s32>) = IMPLICIT_DEF + %1(<4 x s32>) = COPY %xmm1 + %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<4 x s32>), 128 + %ymm0 = COPY %2(<16 x s32>) + RET 0, implicit %ymm0 +... +--- +name: test_insert_256_idx0 +# ALL-LABEL: name: test_insert_256_idx0 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr256x } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %zmm0 +# ALL-NEXT: %1 = COPY %ymm1 +# ALL-NEXT: %2 = VINSERTF64x4Zrr %0, %1, 0 +# ALL-NEXT: %ymm0 = COPY %2 +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %ymm1 + + %0(<16 x s32>) = COPY %zmm0 + %1(<8 x s32>) = COPY %ymm1 + %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<8 x s32>), 0 + %ymm0 = COPY %2(<16 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_insert_256_idx0_undef +# ALL-LABEL: name: test_insert_256_idx0_undef +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vecr } +# ALL-NEXT: - { id: 1, class: vr256x } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %1 = COPY %ymm1 +# ALL-NEXT: undef %2.sub_ymm = COPY %1 +# ALL-NEXT: %ymm0 = COPY %2 +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<16 x s32>) = IMPLICIT_DEF + %1(<8 x s32>) = COPY %ymm1 + %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<8 x s32>), 0 + %ymm0 = COPY %2(<16 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_insert_256_idx1 +# ALL-LABEL: name: test_insert_256_idx1 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr256x } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %zmm0 +# ALL-NEXT: %1 = COPY %ymm1 +# ALL-NEXT: %2 = VINSERTF64x4Zrr %0, %1, 1 +# ALL-NEXT: %ymm0 = COPY %2 +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<16 x s32>) = COPY %zmm0 + %1(<8 x s32>) = COPY %ymm1 + %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<8 x s32>), 256 + %ymm0 = COPY %2(<16 x s32>) + RET 0, implicit %ymm0 +... +--- +name: test_insert_256_idx1_undef +# ALL-LABEL: name: test_insert_256_idx1_undef +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr256x } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = IMPLICIT_DEF +# ALL-NEXT: %1 = COPY %ymm1 +# ALL-NEXT: %2 = VINSERTF64x4Zrr %0, %1, 1 +# ALL-NEXT: %ymm0 = COPY %2 +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<16 x s32>) = IMPLICIT_DEF + %1(<8 x s32>) = COPY %ymm1 + %2(<16 x s32>) = G_INSERT %0(<16 x s32>), %1(<8 x s32>), 256 + %ymm0 = COPY %2(<16 x s32>) + RET 0, implicit %ymm0 +... +