Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -71,6 +71,11 @@ const TargetRegisterClass *DstRC, unsigned Scalar, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitLaneInsert(unsigned Opc, unsigned LaneIdx, unsigned EltSize, + unsigned DstVec, unsigned InsReg, + const RegisterBank &RB, + MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI) const; bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; @@ -2289,6 +2294,29 @@ return true; } +MachineInstr *AArch64InstructionSelector::emitLaneInsert( + unsigned Opc, unsigned LaneIdx, unsigned EltSize, unsigned DstVec, + unsigned InsReg, const RegisterBank &RB, MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI) const { + MachineInstr *InsElt = nullptr; + const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; + unsigned InsDef = MRI.createVirtualRegister(DstRC); + if (RB.getID() == AArch64::FPRRegBankID) { + auto InsSub = emitScalarToVector(EltSize, DstRC, InsReg, MIRBuilder); + InsElt = MIRBuilder.buildInstr(Opc, {InsDef}, {DstVec}) + .addImm(LaneIdx) + .addUse(InsSub->getOperand(0).getReg()) + .addImm(0); + } else { + InsElt = MIRBuilder.buildInstr(Opc, {InsDef}, {DstVec}) + .addImm(LaneIdx) + .addUse(InsReg); + } + + constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); + return InsElt; +} + bool AArch64InstructionSelector::selectBuildVector( MachineInstr &I, MachineRegisterInfo &MRI) const { assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); @@ -2321,34 +2349,11 @@ // a copy using it. MachineInstr *PrevMI = nullptr; for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { - // Note that if we don't do a subregister copy, we end up making one more - // of these than we need. - unsigned InsDef = MRI.createVirtualRegister(DstRC); - unsigned LaneIdx = i - 1; - if (RB.getID() == AArch64::FPRRegBankID) { - auto ImpDef = - MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); - auto InsSub = MIRBuilder - .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, - {ImpDef, I.getOperand(i)}) - .addImm(SubregIdx); - auto InsElt = MIRBuilder.buildInstr(Opc, {InsDef}, {DstVec}) - .addImm(LaneIdx) - .addUse(InsSub.getReg(0)) - .addImm(0); - constrainSelectedInstRegOperands(*ImpDef, TII, TRI, RBI); - constrainSelectedInstRegOperands(*InsSub, TII, TRI, RBI); - constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); - DstVec = InsDef; - PrevMI = &*InsElt; - } else { - auto Ins = MIRBuilder.buildInstr(Opc, {InsDef}, {DstVec}) - .addImm(LaneIdx) - .addUse(I.getOperand(i).getReg()); - constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); - DstVec = InsDef; - PrevMI = &*Ins; - } + // Note that if we don't do a subregister copy, we can end up making an + // extra register. + PrevMI = &*emitLaneInsert(Opc, i - 1, EltSize, DstVec, + I.getOperand(i).getReg(), RB, MIRBuilder, MRI); + DstVec = PrevMI->getOperand(0).getReg(); } // If DstTy's size in bits is less than 128, then emit a subregister copy