Index: lib/Target/X86/X86InstructionSelector.cpp =================================================================== --- lib/Target/X86/X86InstructionSelector.cpp +++ lib/Target/X86/X86InstructionSelector.cpp @@ -75,6 +75,8 @@ bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, @@ -270,6 +272,8 @@ return true; if (selectUadde(I, MRI, MF)) return true; + if (selectMergeValues(I, MRI, MF)) + return true; if (selectExtract(I, MRI, MF)) return true; if (selectInsert(I, MRI, MF)) @@ -876,6 +880,55 @@ return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool X86InstructionSelector::selectMergeValues(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_MERGE_VALUES) + return false; + + // Split to inserts. + unsigned DstReg = I.getOperand(0).getReg(); + unsigned SrcReg0 = I.getOperand(1).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg0); + unsigned SrcSize = SrcTy.getSizeInBits(); + + const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); + + // For the first src use insertSubReg. + unsigned DefReg = MRI.createGenericVirtualRegister(DstTy); + MRI.setRegBank(DefReg, RegBank); + if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF)) + return false; + + for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { + + unsigned Tmp = MRI.createGenericVirtualRegister(DstTy); + MRI.setRegBank(Tmp, RegBank); + + MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::G_INSERT), Tmp) + .addReg(DefReg) + .addReg(I.getOperand(Idx).getReg()) + .addImm((Idx - 1) * SrcSize); + + DefReg = Tmp; + + if (!select(InsertInst)) + return false; + } + + MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::COPY), DstReg) + .addReg(DefReg); + + if (!select(CopyInst)) + return false; + + I.eraseFromParent(); + return true; +} InstructionSelector * llvm::createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &Subtarget, Index: test/CodeGen/X86/GlobalISel/select-merge-vec256.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/GlobalISel/select-merge-vec256.mir @@ -0,0 +1,52 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=AVX +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=AVX512VL +--- | + define void @test_merge() { + ret void + } +... +--- +name: test_merge +# AVX-LABEL: name: test_merge +# +# AVX512VL-LABEL: name: test_merge +alignment: 4 +legalized: true +regBankSelected: true +# AVX: registers: +# AVX-NEXT: - { id: 0, class: vr128 } +# AVX-NEXT: - { id: 1, class: vr256 } +# AVX-NEXT: - { id: 2, class: vr256 } +# AVX-NEXT: - { id: 3, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr128x } +# AVX512VL-NEXT: - { id: 1, class: vr256x } +# AVX512VL-NEXT: - { id: 2, class: vr256x } +# AVX512VL-NEXT: - { id: 3, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } +# AVX: %0 = IMPLICIT_DEF +# AVX-NEXT: undef %2.sub_xmm = COPY %0 +# AVX-NEXT: %3 = VINSERTF128rr %2, %0, 1 +# AVX-NEXT: %1 = COPY %3 +# AVX-NEXT: %ymm0 = COPY %1 +# AVX-NEXT: RET 0, implicit %ymm0 +# +# AVX512VL: %0 = IMPLICIT_DEF +# AVX512VL-NEXT: undef %2.sub_xmm = COPY %0 +# AVX512VL-NEXT: %3 = VINSERTF32x4Z256rr %2, %0, 1 +# AVX512VL-NEXT: %1 = COPY %3 +# AVX512VL-NEXT: %ymm0 = COPY %1 +# AVX512VL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + + %0(<4 x s32>) = IMPLICIT_DEF + %1(<8 x s32>) = G_MERGE_VALUES %0(<4 x s32>), %0(<4 x s32>) + %ymm0 = COPY %1(<8 x s32>) + RET 0, implicit %ymm0 + +... + Index: test/CodeGen/X86/GlobalISel/select-merge-vec512.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/GlobalISel/select-merge-vec512.mir @@ -0,0 +1,74 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL +--- | + define void @test_merge_v128() { + ret void + } + + define void @test_merge_v256() { + ret void + } + +... +--- +name: test_merge_v128 +# ALL-LABEL: name: test_merge_v128 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr128x } +# ALL-NEXT: - { id: 1, class: vr512 } +# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 3, class: vr512 } +# ALL-NEXT: - { id: 4, class: vr512 } +# ALL-NEXT: - { id: 5, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } +# ALL: %0 = IMPLICIT_DEF +# ALL-NEXT: undef %2.sub_xmm = COPY %0 +# ALL-NEXT: %3 = VINSERTF32x4Zrr %2, %0, 1 +# ALL-NEXT: %4 = VINSERTF32x4Zrr %3, %0, 2 +# ALL-NEXT: %5 = VINSERTF32x4Zrr %4, %0, 3 +# ALL-NEXT: %1 = COPY %5 +# ALL-NEXT: %zmm0 = COPY %1 +# ALL-NEXT: RET 0, implicit %zmm0 +body: | + bb.1 (%ir-block.0): + + %0(<4 x s32>) = IMPLICIT_DEF + %1(<16 x s32>) = G_MERGE_VALUES %0(<4 x s32>), %0(<4 x s32>), %0(<4 x s32>), %0(<4 x s32>) + %zmm0 = COPY %1(<16 x s32>) + RET 0, implicit %zmm0 + +... +--- +name: test_merge_v256 +# ALL-LABEL: name: test_merge_v256 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr256x } +# ALL-NEXT: - { id: 1, class: vr512 } +# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 3, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } +# ALL: %0 = IMPLICIT_DEF +# ALL-NEXT: undef %2.sub_ymm = COPY %0 +# ALL-NEXT: %3 = VINSERTF64x4Zrr %2, %0, 1 +# ALL-NEXT: %1 = COPY %3 +# ALL-NEXT: %zmm0 = COPY %1 +# ALL-NEXT: RET 0, implicit %zmm0 +body: | + bb.1 (%ir-block.0): + + %0(<8 x s32>) = IMPLICIT_DEF + %1(<16 x s32>) = G_MERGE_VALUES %0(<8 x s32>), %0(<8 x s32>) + %zmm0 = COPY %1(<16 x s32>) + RET 0, implicit %zmm0 + +... +