Index: llvm/trunk/lib/Target/X86/X86InstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstructionSelector.cpp +++ llvm/trunk/lib/Target/X86/X86InstructionSelector.cpp @@ -77,10 +77,15 @@ bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; // emit insert subreg instruction and insert it before MachineInstr &I bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + // emit extract subreg instruction and insert it before MachineInstr &I + bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, + MachineRegisterInfo &MRI, MachineFunction &MF) const; const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const; const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg, @@ -265,6 +270,8 @@ return true; if (selectUadde(I, MRI, MF)) return true; + if (selectExtract(I, MRI, MF)) + return true; if (selectInsert(I, MRI, MF)) return true; @@ -711,6 +718,103 @@ return true; } +bool X86InstructionSelector::selectExtract(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + if (I.getOpcode() != TargetOpcode::G_EXTRACT) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned SrcReg = I.getOperand(1).getReg(); + int64_t Index = I.getOperand(2).getImm(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + + // Meanwile handle vector type only. + if (!DstTy.isVector()) + return false; + + if (Index % DstTy.getSizeInBits() != 0) + return false; // Not extract subvector. + + if (Index == 0) { + // Replace by extract subreg copy. + if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF)) + return false; + + I.eraseFromParent(); + return true; + } + + bool HasAVX = STI.hasAVX(); + bool HasAVX512 = STI.hasAVX512(); + bool HasVLX = STI.hasVLX(); + + if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) { + if (HasVLX) + I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr)); + else if (HasAVX) + I.setDesc(TII.get(X86::VEXTRACTF128rr)); + else + return false; + } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) { + if (DstTy.getSizeInBits() == 128) + I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr)); + else if (DstTy.getSizeInBits() == 256) + I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr)); + else + return false; + } else + return false; + + // Convert to X86 VEXTRACT immediate. + Index = Index / DstTy.getSizeInBits(); + I.getOperand(2).setImm(Index); + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + +bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg, + MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + unsigned SubIdx = X86::NoSubRegister; + + if (!DstTy.isVector() || !SrcTy.isVector()) + return false; + + assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() && + "Incorrect Src/Dst register size"); + + if (DstTy.getSizeInBits() == 128) + SubIdx = X86::sub_xmm; + else if (DstTy.getSizeInBits() == 256) + SubIdx = X86::sub_ymm; + else + return false; + + const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); + const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); + + SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); + + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); + return false; + } + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg) + .addReg(SrcReg, 0, SubIdx); + + return true; +} + bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, MachineRegisterInfo &MRI, Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-extract-vec256.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/select-extract-vec256.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-extract-vec256.mir @@ -0,0 +1,80 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL + +--- | + define void @test_extract_128_idx0() { + ret void + } + + define void @test_extract_128_idx1() { + ret void + } + +... +--- +name: test_extract_128_idx0 +# ALL-LABEL: name: test_extract_128_idx0 +alignment: 4 +legalized: true +regBankSelected: true +# AVX: registers: +# AVX-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } +# ALL: %0 = COPY %ymm1 +# ALL-NEXT: %1 = COPY %0.sub_xmm +# ALL-NEXT: %xmm0 = COPY %1 +# ALL-NEXT: RET 0, implicit %xmm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm1 + + %0(<8 x s32>) = COPY %ymm1 + %1(<4 x s32>) = G_EXTRACT %0(<8 x s32>), 0 + %xmm0 = COPY %1(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +name: test_extract_128_idx1 +# ALL-LABEL: name: test_extract_128_idx1 +alignment: 4 +legalized: true +regBankSelected: true +# AVX: registers: +# AVX-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } +# AVX: %0 = COPY %ymm1 +# AVX-NEXT: %1 = VEXTRACTF128rr %0, 1 +# AVX-NEXT: %xmm0 = COPY %1 +# AVX-NEXT: RET 0, implicit %xmm0 +# +# AVX512VL: %0 = COPY %ymm1 +# AVX512VL-NEXT: %1 = VEXTRACTF32x4Z256rr %0, 1 +# AVX512VL-NEXT: %xmm0 = COPY %1 +# AVX512VL-NEXT: RET 0, implicit %xmm0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm1 + + %0(<8 x s32>) = COPY %ymm1 + %1(<4 x s32>) = G_EXTRACT %0(<8 x s32>), 128 + %xmm0 = COPY %1(<4 x s32>) + RET 0, implicit %xmm0 + +... + + Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-extract-vec512.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/select-extract-vec512.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-extract-vec512.mir @@ -0,0 +1,127 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL + +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL + +--- | + define void @test_extract_128_idx0() { + ret void + } + + define void @test_extract_128_idx1() { + ret void + } + + define void @test_extract_256_idx0() { + ret void + } + + define void @test_extract_256_idx1() { + ret void + } + +... +--- +name: test_extract_128_idx0 +# ALL-LABEL: name: test_extract_128_idx0 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } +# ALL: %0 = COPY %zmm1 +# ALL-NEXT: %1 = COPY %0.sub_xmm +# ALL-NEXT: %xmm0 = COPY %1 +# ALL-NEXT: RET 0, implicit %xmm0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm1 + + %0(<16 x s32>) = COPY %zmm1 + %1(<4 x s32>) = G_EXTRACT %0(<16 x s32>), 0 + %xmm0 = COPY %1(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +name: test_extract_128_idx1 +# ALL-LABEL: name: test_extract_128_idx1 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } +# ALL: %0 = COPY %zmm1 +# ALL-NEXT: %1 = VEXTRACTF32x4Zrr %0, 1 +# ALL-NEXT: %xmm0 = COPY %1 +# ALL-NEXT: RET 0, implicit %xmm0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm1 + + %0(<16 x s32>) = COPY %zmm1 + %1(<4 x s32>) = G_EXTRACT %0(<16 x s32>), 128 + %xmm0 = COPY %1(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +name: test_extract_256_idx0 +# ALL-LABEL: name: test_extract_256_idx0 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } +# ALL: %0 = COPY %zmm1 +# ALL-NEXT: %1 = COPY %0.sub_ymm +# ALL-NEXT: %ymm0 = COPY %1 +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm1 + + %0(<16 x s32>) = COPY %zmm1 + %1(<8 x s32>) = G_EXTRACT %0(<16 x s32>), 0 + %ymm0 = COPY %1(<8 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_extract_256_idx1 +# ALL-LABEL: name: test_extract_256_idx1 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } +# ALL: %0 = COPY %zmm1 +# ALL-NEXT: %1 = VEXTRACTF64x4Zrr %0, 1 +# ALL-NEXT: %ymm0 = COPY %1 +# ALL-NEXT: RET 0, implicit %ymm0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm1 + + %0(<16 x s32>) = COPY %zmm1 + %1(<8 x s32>) = G_EXTRACT %0(<16 x s32>), 256 + %ymm0 = COPY %1(<8 x s32>) + RET 0, implicit %ymm0 + +... +