Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -81,6 +81,14 @@ bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); + /// If \p MI is extend that consumes the result of a extract_vector_elt, + /// try to combine it. Returns true if MI changed. + bool tryCombineExtendingVectorExtracts(MachineInstr &MI); + bool matchCombineExtendingVectorExtracts(MachineInstr &MI, + PreferredTuple &MatchInfo); + void applyCombineExtendingVectorExtracts(MachineInstr &MI, + PreferredTuple &MatchInfo); + /// Combine \p MI into a pre-indexed or post-indexed load/store operation if /// legal and the surrounding code makes it useful. bool tryCombineIndexedLoadStore(MachineInstr &MI); Index: llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -179,6 +179,11 @@ /// - OpIdx - Operand index /// - SizeInBits - The size of the pointer value in bits. GIM_CheckPointerToAny, + /// Check the type of a pointer to any address space. + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - SizeInBits - The size of the pointer value in bits. + GIM_CheckScalarOfPtrSize, /// Check the register bank for the specified operand /// - InsnID - Instruction ID /// - OpIdx - Operand index @@ -273,6 +278,11 @@ /// - OpIdx - The operand to copy /// - SubRegIdx - The subregister to copy GIR_CopySubReg, + /// Copy an operand to the specified instruction + /// - NewInsnID - Instruction ID to modify + /// - TempRegID - The temporary register ID to to copy from + /// - SubRegIdx - The subregister to copy + GIR_CopyTmpSubReg, /// Add an implicit register def to the specified instruction /// - InsnID - Instruction ID to modify Index: llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -548,7 +548,7 @@ // iPTR must be looked up in the target. if (SizeInBits == 0) { MachineFunction *MF = State.MIs[InsnID]->getParent()->getParent(); - const unsigned AddrSpace = Ty.getAddressSpace(); + const unsigned AddrSpace = Ty.isPointer() ? Ty.getAddressSpace() : 0; SizeInBits = MF->getDataLayout().getPointerSizeInBits(AddrSpace); } @@ -563,6 +563,40 @@ break; } + case GIM_CheckScalarOfPtrSize: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t SizeInBits = MatchTable[CurrentIdx++]; + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckScalarOfPtrSize(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << ")"); + + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + const LLT Ty = MRI.getType(MO.getReg()); + + // iPTR must be looked up in the target. + if (SizeInBits == 0) { + MachineFunction *MF = State.MIs[InsnID]->getParent()->getParent(); + const unsigned AddrSpace = Ty.isPointer() ? Ty.getAddressSpace() : 0; + SizeInBits = MF->getDataLayout().getPointerSizeInBits(AddrSpace); + } + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << ", SizeInBits=" << SizeInBits << ")\n"); + assert(SizeInBits != 0 && "Pointer size must be known"); + + if (MO.isReg()) { + if (!Ty.isScalar() || Ty.getSizeInBits() != SizeInBits) + if (handleReject() == RejectAndGiveUp) + return false; + } else if (handleReject() == RejectAndGiveUp) + return false; + + break; + } case GIM_CheckRegBankForClass: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -817,7 +851,16 @@ << OpIdx << ", " << SubRegIdx << ")\n"); break; } - + case GIR_CopyTmpSubReg: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t TempRegID = MatchTable[CurrentIdx++]; + uint64_t SubRegIdx = MatchTable[CurrentIdx++]; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + OutMIs[InsnID].addReg(State.TempRegisters[TempRegID], 0, SubRegIdx); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << "Whatever ... \n"); + break; + } case GIR_AddImplicitDef: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t RegNum = MatchTable[CurrentIdx++]; Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -554,6 +554,12 @@ /// Generic extractelement. HANDLE_TARGET_OPCODE(G_EXTRACT_VECTOR_ELT) +/// Generic extractelement with signextend. +HANDLE_TARGET_OPCODE(G_SEXT_EXTRACT_VECTOR_ELT) + +/// Generic extractelement with zeroextend. +HANDLE_TARGET_OPCODE(G_ZEXT_EXTRACT_VECTOR_ELT) + /// Generic shufflevector. HANDLE_TARGET_OPCODE(G_SHUFFLE_VECTOR) Index: llvm/include/llvm/Target/GenericOpcodes.td =================================================================== --- llvm/include/llvm/Target/GenericOpcodes.td +++ llvm/include/llvm/Target/GenericOpcodes.td @@ -1024,6 +1024,20 @@ let hasSideEffects = 0; } +// Generic extractelement with signextend. +def G_SEXT_EXTRACT_VECTOR_ELT : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src, type2:$idx); + let hasSideEffects = 0; +} + +// Generic extractelement with zeroextend. +def G_ZEXT_EXTRACT_VECTOR_ELT : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src, type2:$idx); + let hasSideEffects = 0; +} + // Generic shufflevector. // // The mask operand should be an IR Constant which exactly matches the Index: llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -117,6 +117,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; // Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some // complications that tablegen must take care of. For example, Predicates such Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -537,6 +537,149 @@ Observer.changedInstr(MI); } +bool CombinerHelper::tryCombineExtendingVectorExtracts(MachineInstr &MI) { + PreferredTuple Preferred; + if (matchCombineExtendingVectorExtracts(MI, Preferred)) { + applyCombineExtendingVectorExtracts(MI, Preferred); + return true; + } + return false; +} + +bool CombinerHelper::matchCombineExtendingVectorExtracts( + MachineInstr &MI, PreferredTuple &Preferred) { + // We match the loads and follow the uses to the extend instead of matching + // the extends and following the def to the load. This is because the load + // must remain in the same position for correctness (unless we also add code + // to find a safe place to sink it) whereas the extend is freely movable. + // It also prevents us from duplicating the load for the volatile case or just + // for performance. + + if (MI.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT && + MI.getOpcode() != TargetOpcode::G_SEXT_EXTRACT_VECTOR_ELT && + MI.getOpcode() != TargetOpcode::G_ZEXT_EXTRACT_VECTOR_ELT) + return false; + + auto &ExtractValue = MI.getOperand(0); + assert(ExtractValue.isReg() && "Result wasn't a register?"); + + LLT ExtractValueTy = MRI.getType(ExtractValue.getReg()); + if (!ExtractValueTy.isScalar()) + return false; + + // Find the preferred type aside from the any-extends (unless it's the only + // one) and non-extending ops. We'll emit an extending Extract to that type + // and and emit a variant of (extend (trunc X)) for the others according to + // the relative type sizes. At the same time, pick an extend to use based on + // the extend involved in the chosen type. + unsigned PreferredOpcode = + MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT + ? TargetOpcode::G_ANYEXT + : MI.getOpcode() == TargetOpcode::G_SEXT_EXTRACT_VECTOR_ELT + ? TargetOpcode::G_SEXT + : TargetOpcode::G_ZEXT; + Preferred = {LLT(), PreferredOpcode, nullptr}; + for (auto &UseMI : MRI.use_instructions(ExtractValue.getReg())) { + if (UseMI.getOpcode() == TargetOpcode::G_SEXT || + UseMI.getOpcode() == TargetOpcode::G_ZEXT || + UseMI.getOpcode() == TargetOpcode::G_ANYEXT) { + Preferred = ChoosePreferredUse(Preferred, + MRI.getType(UseMI.getOperand(0).getReg()), + UseMI.getOpcode(), &UseMI); + } + } + + // There were no extends + if (!Preferred.MI) + return false; + // It should be impossible to chose an extend without selecting a different + // type since by definition the result of an extend is larger. + assert(Preferred.Ty != ExtractValueTy && "Extending to same type?"); + + LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI); + return true; +} + +void CombinerHelper::applyCombineExtendingVectorExtracts( + MachineInstr &MI, PreferredTuple &Preferred) { + // Rewrite the extract to the chosen extending extract. + Register ChosenDstReg = Preferred.MI->getOperand(0).getReg(); + + // Inserter to insert a truncate back to the original type at a given point + // with some basic CSE to limit truncate duplication to one per BB. + DenseMap EmittedInsns; + auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB, + MachineBasicBlock::iterator InsertBefore, + MachineOperand &UseMO) { + MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); + if (PreviouslyEmitted) { + Observer.changingInstr(*UseMO.getParent()); + UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg()); + Observer.changedInstr(*UseMO.getParent()); + return; + } + + Builder.setInsertPt(*InsertIntoBB, InsertBefore); + Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); + MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); + EmittedInsns[InsertIntoBB] = NewMI; + replaceRegOpWith(MRI, UseMO, NewDstReg); + }; + + Observer.changingInstr(MI); + MI.setDesc( + Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT + ? TargetOpcode::G_SEXT_EXTRACT_VECTOR_ELT + : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT + ? TargetOpcode::G_ZEXT_EXTRACT_VECTOR_ELT + : TargetOpcode::G_EXTRACT_VECTOR_ELT)); + + // Rewrite all the uses to fix up the types. + auto &ExtractValue = MI.getOperand(0); + SmallVector Uses; + for (auto &UseMO : MRI.use_operands(ExtractValue.getReg())) + Uses.push_back(&UseMO); + + for (auto *UseMO : Uses) { + MachineInstr *UseMI = UseMO->getParent(); + + // If the extend is compatible with the preferred extend then we should fix + // up the type and extend so that it uses the preferred use. + if (UseMI->getOpcode() == Preferred.ExtendOpcode || + UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { + Register UseDstReg = UseMI->getOperand(0).getReg(); + MachineOperand &UseSrcMO = UseMI->getOperand(1); + const LLT &UseDstTy = MRI.getType(UseDstReg); + if (UseDstReg != ChosenDstReg) { + if (Preferred.Ty == UseDstTy) { + replaceRegWith(MRI, UseDstReg, ChosenDstReg); + Observer.erasingInstr(*UseMO->getParent()); + UseMO->getParent()->eraseFromParent(); + } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { + replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg); + } else { + InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, + InsertTruncAt); + } + continue; + } + // The use is (one of) the uses of the preferred use we chose earlier. + // We're going to update the load to def this value later so just erase + // the old extend. + Observer.erasingInstr(*UseMO->getParent()); + UseMO->getParent()->eraseFromParent(); + continue; + } + + // The use isn't an extend. Truncate back to the type we originally loaded. + // This is free on many targets. + InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt); + } + + MI.getOperand(0).setReg(ChosenDstReg); + Observer.changedInstr(MI); +} + bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) { assert(DefMI.getParent() == UseMI.getParent()); if (&DefMI == &UseMI) Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1773,19 +1773,19 @@ } case TargetOpcode::G_EXTRACT_VECTOR_ELT: { if (TypeIdx == 0) { - Register VecReg = MI.getOperand(1).getReg(); - LLT VecTy = MRI.getType(VecReg); Observer.changingInstr(MI); - - widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), - WideTy.getSizeInBits()), - 1, TargetOpcode::G_SEXT); - widenScalarDst(MI, WideTy, 0); Observer.changedInstr(MI); return Legalized; } + if (TypeIdx == 1) { + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + Observer.changedInstr(MI); + return Legalized; + } + if (TypeIdx != 2) return UnableToLegalize; Observer.changingInstr(MI); @@ -1795,16 +1795,17 @@ return Legalized; } case TargetOpcode::G_INSERT_VECTOR_ELT: { - if (TypeIdx == 1) { + if (TypeIdx == 0) { Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + } - Register VecReg = MI.getOperand(1).getReg(); - LLT VecTy = MRI.getType(VecReg); - LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy); - - widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT); + if (TypeIdx == 1) { + Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); - widenScalarDst(MI, WideVecTy, 0); Observer.changedInstr(MI); return Legalized; } Index: llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -1087,9 +1087,6 @@ DstOps[0].getLLTTy(*getMRI()).isPointer()) && "Invalid operand type"); assert(SrcOps[1].getLLTTy(*getMRI()).isScalar() && "Invalid operand type"); - assert(SrcOps[0].getLLTTy(*getMRI()).getElementType() == - DstOps[0].getLLTTy(*getMRI()) && - "Type mismatch"); break; } case TargetOpcode::G_INSERT_VECTOR_ELT: { @@ -1097,9 +1094,6 @@ assert(SrcOps.size() == 3 && "Invalid src size"); assert(DstOps[0].getLLTTy(*getMRI()).isVector() && SrcOps[0].getLLTTy(*getMRI()).isVector() && "Invalid operand type"); - assert(DstOps[0].getLLTTy(*getMRI()).getElementType() == - SrcOps[1].getLLTTy(*getMRI()) && - "Type mismatch"); assert(SrcOps[2].getLLTTy(*getMRI()).isScalar() && "Invalid index"); assert(DstOps[0].getLLTTy(*getMRI()).getNumElements() == SrcOps[0].getLLTTy(*getMRI()).getNumElements() && Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -920,14 +920,15 @@ const LLT EltTy = Query.Types[EltTypeIdx]; const LLT VecTy = Query.Types[VecTypeIdx]; const LLT IdxTy = Query.Types[IdxTypeIdx]; - return (EltTy.getSizeInBits() == 16 || + return EltTy.getSizeInBits() == VecTy.getScalarSizeInBits() && + (EltTy.getSizeInBits() == 16 || EltTy.getSizeInBits() % 32 == 0) && VecTy.getSizeInBits() % 32 == 0 && VecTy.getSizeInBits() <= 1024 && IdxTy.getSizeInBits() == 32; }) .clampScalar(EltTypeIdx, S32, S64) - .clampScalar(VecTypeIdx, S32, S64) + .clampScalarOrElt(VecTypeIdx, S32, S64) .clampScalar(IdxTypeIdx, S32, S32); } Index: llvm/lib/Target/Mips/MipsLegalizerInfo.cpp =================================================================== --- llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -99,6 +99,35 @@ getActionDefinitionsBuilder(G_IMPLICIT_DEF) .legalFor({s32, s64}); + getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) + .legalIf([=, &ST](const LegalityQuery &Query) { + if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}) && + CheckTyN(1, Query, {s32, s64}) && CheckTyN(2, Query, {s32})) + return true; + return false; + }) + .minScalar(1, s32); + + getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) + .legalIf([=, &ST](const LegalityQuery &Query) { + if (ST.hasMSA() && CheckTyN(0, Query, {s32, s64}) && + CheckTyN(1, Query, {v16s8, v8s16, v4s32, v2s64}) && + CheckTyN(2, Query, {s32})) + return true; + return false; + }) + .minScalar(0, s32); + + getActionDefinitionsBuilder( + {G_SEXT_EXTRACT_VECTOR_ELT, G_ZEXT_EXTRACT_VECTOR_ELT}) + .legalIf([=, &ST](const LegalityQuery &Query) { + if (ST.hasMSA() && CheckTyN(0, Query, {s32, s64}) && + CheckTyN(1, Query, {v16s8, v8s16, v4s32, v2s64}) && + CheckTyN(2, Query, {s32})) + return true; + return false; + }); + getActionDefinitionsBuilder(G_UNMERGE_VALUES) .legalFor({{s32, s64}}); Index: llvm/lib/Target/Mips/MipsMSAInstrInfo.td =================================================================== --- llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -51,9 +51,12 @@ def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT", - SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; + SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>; def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT", - SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; + SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>; + +def : GINodeEquiv; +def : GINodeEquiv; def immZExt1Ptr : ImmLeaf(Imm);}]>; def immZExt2Ptr : ImmLeaf(Imm);}]>; @@ -71,22 +74,22 @@ // Pattern fragments def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx), - (MipsVExtractSExt node:$vec, node:$idx, i8)>; + (MipsVExtractSExt node:$vec, node:$idx)>; def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx), - (MipsVExtractSExt node:$vec, node:$idx, i16)>; + (MipsVExtractSExt node:$vec, node:$idx)>; def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx), - (MipsVExtractSExt node:$vec, node:$idx, i32)>; + (MipsVExtractSExt node:$vec, node:$idx)>; def vextract_sext_i64 : PatFrag<(ops node:$vec, node:$idx), - (MipsVExtractSExt node:$vec, node:$idx, i64)>; + (MipsVExtractSExt node:$vec, node:$idx)>; def vextract_zext_i8 : PatFrag<(ops node:$vec, node:$idx), - (MipsVExtractZExt node:$vec, node:$idx, i8)>; + (MipsVExtractZExt node:$vec, node:$idx)>; def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx), - (MipsVExtractZExt node:$vec, node:$idx, i16)>; + (MipsVExtractZExt node:$vec, node:$idx)>; def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx), - (MipsVExtractZExt node:$vec, node:$idx, i32)>; + (MipsVExtractZExt node:$vec, node:$idx)>; def vextract_zext_i64 : PatFrag<(ops node:$vec, node:$idx), - (MipsVExtractZExt node:$vec, node:$idx, i64)>; + (MipsVExtractZExt node:$vec, node:$idx)>; def vinsert_v16i8 : PatFrag<(ops node:$vec, node:$val, node:$idx), (v16i8 (vector_insert node:$vec, node:$val, node:$idx))>; Index: llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp +++ llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp @@ -46,6 +46,10 @@ case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: return Helper.tryCombineExtendingLoads(MI); + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + case TargetOpcode::G_SEXT_EXTRACT_VECTOR_ELT: + case TargetOpcode::G_ZEXT_EXTRACT_VECTOR_ELT: + return Helper.tryCombineExtendingVectorExtracts(MI); } return false; } Index: llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -99,6 +99,8 @@ case Mips::MSA128WRegClassID: case Mips::MSA128DRegClassID: return getRegBank(Mips::FPRBRegBankID); + case Mips::GPR64RegClassID: + return getRegBank(Mips::InvalidBRegBankID); default: llvm_unreachable("Register class not supported"); } @@ -162,6 +164,8 @@ case TargetOpcode::G_PHI: case TargetOpcode::G_SELECT: case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_INSERT_VECTOR_ELT: + case TargetOpcode::G_EXTRACT_VECTOR_ELT: return true; default: return false; @@ -226,9 +230,13 @@ if (MI->getOpcode() == TargetOpcode::G_LOAD) addDefUses(MI->getOperand(0).getReg(), MRI); + if (MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) + addDefUses(MI->getOperand(0).getReg(), MRI); if (MI->getOpcode() == TargetOpcode::G_STORE) addUseDef(MI->getOperand(0).getReg(), MRI); + if (MI->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) + addUseDef(MI->getOperand(2).getReg(), MRI); if (MI->getOpcode() == TargetOpcode::G_PHI) { addDefUses(MI->getOperand(0).getReg(), MRI); @@ -380,6 +388,7 @@ } static const unsigned CustomMappingID = 1; +static const unsigned CustomMappingOpcodeID = 2; // Only 64 bit mapping is available in fprb and will be marked as custom, i.e. // will be split into two 32 bit registers in gprb. @@ -527,6 +536,57 @@ OperandsMapping = getGprbOrCustomMapping(Op0Size, MappingID); break; + case G_INSERT_VECTOR_ELT: { + const LLT Op2Ty = MRI.getType(MI.getOperand(2).getReg()); + unsigned Op2Size = Op2Ty.getSizeInBits(); + + if (!Op2Ty.isPointer()) + InstTy = TI.determineInstType(&MI); + + const RegisterBankInfo::ValueMapping *MSABank = getMSAMapping(MF); + if (InstTy == InstType::FloatingPoint || + (Op2Size == 64 && InstTy == InstType::Ambiguous)) + OperandsMapping = + getOperandsMapping({MSABank, MSABank, getFprbMapping(Op2Size), + &Mips::ValueMappings[Mips::GPRIdx]}); + else + // Ambiguous 32 bit vector insert will be mapped to gprb to match the + // way other ambiguous 32 bit operands are mapped. + OperandsMapping = getOperandsMapping( + {MSABank, MSABank, getGprbOrCustomMapping(Op2Size, MappingID), + &Mips::ValueMappings[Mips::GPRIdx]}); + + break; + } + case G_EXTRACT_VECTOR_ELT: { + if (!Op0Ty.isPointer()) + InstTy = TI.determineInstType(&MI); + + const RegisterBankInfo::ValueMapping *MSABank = getMSAMapping(MF); + if (InstTy == InstType::FloatingPoint || + (Op0Size == 64 && InstTy == InstType::Ambiguous)) + OperandsMapping = + getOperandsMapping({getFprbMapping(Op0Size), MSABank, + &Mips::ValueMappings[Mips::GPRIdx]}); + else { + // Ambiguous 32 bit vector extract will be mapped to gprb to match the + // way other ambiguous 32 bit operands are mapped. + OperandsMapping = + getOperandsMapping({getGprbOrCustomMapping(Op0Size, MappingID), + MSABank, &Mips::ValueMappings[Mips::GPRIdx]}); + if (Op0Size == 32 && getGprbOrCustomMapping(Op0Size, MappingID) == + &Mips::ValueMappings[Mips::GPRIdx]) + MappingID = CustomMappingOpcodeID; + } + break; + } + case G_SEXT_EXTRACT_VECTOR_ELT: + case G_ZEXT_EXTRACT_VECTOR_ELT: { + OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], + getMSAMapping(MF), + &Mips::ValueMappings[Mips::GPRIdx]}); + break; + } case G_UNMERGE_VALUES: OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], &Mips::ValueMappings[Mips::GPRIdx], @@ -671,6 +731,20 @@ LegalizerHelper Helper(*MF, WrapperObserver, B); LegalizationArtifactCombiner ArtCombiner(B, MF->getRegInfo(), LegInfo); + if (OpdMapper.getInstrMapping().getID() == CustomMappingOpcodeID) + switch (MI.getOpcode()) { + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { + MRI.setRegBank(MI.getOperand(0).getReg(), + getRegBank(Mips::GPRBRegBankID)); + MI.setDesc(MI.getMF()->getSubtarget().getInstrInfo()->get( + TargetOpcode::G_SEXT_EXTRACT_VECTOR_ELT)); + return; + } + default: + break; + } + + if(OpdMapper.getInstrMapping().getID() == CustomMappingID) switch (MI.getOpcode()) { case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: Index: llvm/lib/Target/Mips/MipsRegisterBanks.td =================================================================== --- llvm/lib/Target/Mips/MipsRegisterBanks.td +++ llvm/lib/Target/Mips/MipsRegisterBanks.td @@ -12,3 +12,5 @@ def GPRBRegBank : RegisterBank<"GPRB", [GPR32]>; def FPRBRegBank : RegisterBank<"FPRB", [FGR64, AFGR64, MSA128D]>; + +def InvalidBRegBank : RegisterBank<"InvalidB", [GPR64]>; Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -440,6 +440,12 @@ # DEBUG-NEXT: G_EXTRACT_VECTOR_ELT (opcode {{[0-9]+}}): 3 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_SEXT_EXTRACT_VECTOR_ELT (opcode 155): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_ZEXT_EXTRACT_VECTOR_ELT (opcode 156): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_SHUFFLE_VECTOR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -150,9 +150,9 @@ ; CHECK-LABEL: name: insert_vector_elt_0_v2i8_i32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY [[DEF]](<2 x s32>) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY1]], [[COPY2]](s32), 0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[DEF]](<2 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY2]], [[COPY1]](s32), 0 ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY [[INSERT]](<2 x s32>) ; CHECK: $vgpr0_vgpr1 = COPY [[COPY3]](<2 x s32>) %0:_(s32) = COPY $vgpr0 Index: llvm/test/CodeGen/Mips/GlobalISel/instruction-select/extract_vector_elt_imm_index.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/instruction-select/extract_vector_elt_imm_index.mir @@ -0,0 +1,214 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @extract_i8_sext(<16 x i8>* %V, i32 %index) { entry: ret void } + define void @extract_i16_sext(<8 x i16>* %V, i32 %index) { entry: ret void } + define void @extract_i32_sext(<4 x i32>* %V, i32 %index) { entry: ret void } + define void @extract_i8_zext(<16 x i8>* %V, i32 %index) { entry: ret void } + define void @extract_i16_zext(<8 x i16>* %V, i32 %index) { entry: ret void } + define void @extract_i32_zext(<4 x i32>* %V, i32 %index) { entry: ret void } + define void @extract_float_fprb(<4 x float>* %V, i32 %index) { entry: ret void } + define void @extract_double(<2 x double>* %V, i32 %index) { entry: ret void } + +... +--- +name: extract_i8_sext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i8_sext + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[COPY_S_B:%[0-9]+]]:gpr32 = COPY_S_B [[LD_B]], 7 + ; P5600: $v0 = COPY [[COPY_S_B]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %3:gprb(s32) = G_CONSTANT i32 7 + %1:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:gprb(s32) = G_SEXT_EXTRACT_VECTOR_ELT %1(<16 x s8>), %3(s32) + $v0 = COPY %4(s32) + RetRA implicit $v0 + +... +--- +name: extract_i16_sext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i16_sext + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[COPY_S_H:%[0-9]+]]:gpr32 = COPY_S_H [[LD_H]], 5 + ; P5600: $v0 = COPY [[COPY_S_H]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %3:gprb(s32) = G_CONSTANT i32 5 + %1:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:gprb(s32) = G_SEXT_EXTRACT_VECTOR_ELT %1(<8 x s16>), %3(s32) + $v0 = COPY %4(s32) + RetRA implicit $v0 + +... +--- +name: extract_i32_sext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i32_sext + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[COPY_S_W:%[0-9]+]]:gpr32 = COPY_S_W [[LD_W]], 2 + ; P5600: $v0 = COPY [[COPY_S_W]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %3:gprb(s32) = G_CONSTANT i32 2 + %1:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %2:gprb(s32) = G_SEXT_EXTRACT_VECTOR_ELT %1(<4 x s32>), %3(s32) + $v0 = COPY %2(s32) + RetRA implicit $v0 + +... +--- +name: extract_i8_zext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i8_zext + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[COPY_U_B:%[0-9]+]]:gpr32 = COPY_U_B [[LD_B]], 7 + ; P5600: $v0 = COPY [[COPY_U_B]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %3:gprb(s32) = G_CONSTANT i32 7 + %1:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:gprb(s32) = G_ZEXT_EXTRACT_VECTOR_ELT %1(<16 x s8>), %3(s32) + $v0 = COPY %4(s32) + RetRA implicit $v0 + +... +--- +name: extract_i16_zext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i16_zext + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[COPY_U_H:%[0-9]+]]:gpr32 = COPY_U_H [[LD_H]], 5 + ; P5600: $v0 = COPY [[COPY_U_H]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %3:gprb(s32) = G_CONSTANT i32 5 + %1:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:gprb(s32) = G_ZEXT_EXTRACT_VECTOR_ELT %1(<8 x s16>), %3(s32) + $v0 = COPY %4(s32) + RetRA implicit $v0 + +... +--- +name: extract_i32_zext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i32_zext + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[COPY_S_W:%[0-9]+]]:gpr32 = COPY_S_W [[LD_W]], 2 + ; P5600: $v0 = COPY [[COPY_S_W]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %3:gprb(s32) = G_CONSTANT i32 2 + %1:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %2:gprb(s32) = G_ZEXT_EXTRACT_VECTOR_ELT %1(<4 x s32>), %3(s32) + $v0 = COPY %2(s32) + RetRA implicit $v0 + +... +--- +name: extract_float_fprb +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_float_fprb + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[COPY_FW:%[0-9]+]]:fgr32 = COPY_FW_PSEUDO [[LD_W]], 3 + ; P5600: $f0 = COPY [[COPY_FW]] + ; P5600: RetRA implicit $f0 + %0:gprb(p0) = COPY $a0 + %3:gprb(s32) = G_CONSTANT i32 3 + %1:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %2:fprb(s32) = G_EXTRACT_VECTOR_ELT %1(<4 x s32>), %3(s32) + $f0 = COPY %2(s32) + RetRA implicit $f0 + +... +--- +name: extract_double +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_double + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[COPY_FD:%[0-9]+]]:fgr64 = COPY_FD_PSEUDO [[LD_D]], 1 + ; P5600: $d0_64 = COPY [[COPY_FD]] + ; P5600: RetRA implicit $d0_64 + %0:gprb(p0) = COPY $a0 + %3:gprb(s32) = G_CONSTANT i32 1 + %1:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %2:fprb(s64) = G_EXTRACT_VECTOR_ELT %1(<2 x s64>), %3(s32) + $d0_64 = COPY %2(s64) + RetRA implicit $d0_64 + +... Index: llvm/test/CodeGen/Mips/GlobalISel/instruction-select/extract_vector_elt_variable_index.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/instruction-select/extract_vector_elt_variable_index.mir @@ -0,0 +1,240 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @extract_i8_sext(<16 x i8>* %V, i32 %index) { entry: ret void } + define void @extract_i16_sext(<8 x i16>* %V, i32 %index) { entry: ret void } + define void @extract_i32_sext(<4 x i32>* %V, i32 %index) { entry: ret void } + define void @extract_i8_zext(<16 x i8>* %V, i32 %index) { entry: ret void } + define void @extract_i16_zext(<8 x i16>* %V, i32 %index) { entry: ret void } + define void @extract_i32_zext(<4 x i32>* %V, i32 %index) { entry: ret void } + define void @extract_float_fprb(<4 x float>* %V, i32 %index) { entry: ret void } + define void @extract_double(<2 x double>* %V, i32 %index) { entry: ret void } + +... +--- +name: extract_i8_sext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i8_sext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[SPLAT_B:%[0-9]+]]:msa128b = SPLAT_B [[LD_B]], [[COPY1]] + ; P5600: [[COPY2:%[0-9]+]]:fgr32 = COPY [[SPLAT_B]].sub_lo + ; P5600: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; P5600: [[SRA:%[0-9]+]]:gpr32 = SRA [[COPY3]], 24 + ; P5600: $v0 = COPY [[SRA]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %1:gprb(s32) = COPY $a1 + %2:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:gprb(s32) = G_SEXT_EXTRACT_VECTOR_ELT %2(<16 x s8>), %1(s32) + $v0 = COPY %4(s32) + RetRA implicit $v0 + +... +--- +name: extract_i16_sext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i16_sext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[SPLAT_H:%[0-9]+]]:msa128h = SPLAT_H [[LD_H]], [[COPY1]] + ; P5600: [[COPY2:%[0-9]+]]:fgr32 = COPY [[SPLAT_H]].sub_lo + ; P5600: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; P5600: [[SRA:%[0-9]+]]:gpr32 = SRA [[COPY3]], 16 + ; P5600: $v0 = COPY [[SRA]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %1:gprb(s32) = COPY $a1 + %2:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:gprb(s32) = G_SEXT_EXTRACT_VECTOR_ELT %2(<8 x s16>), %1(s32) + $v0 = COPY %4(s32) + RetRA implicit $v0 + +... +--- +name: extract_i32_sext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i32_sext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[SPLAT_W:%[0-9]+]]:msa128w = SPLAT_W [[LD_W]], [[COPY1]] + ; P5600: [[COPY2:%[0-9]+]]:fgr32 = COPY [[SPLAT_W]].sub_lo + ; P5600: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; P5600: $v0 = COPY [[COPY3]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %1:gprb(s32) = COPY $a1 + %2:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %3:gprb(s32) = G_SEXT_EXTRACT_VECTOR_ELT %2(<4 x s32>), %1(s32) + $v0 = COPY %3(s32) + RetRA implicit $v0 + +... +--- +name: extract_i8_zext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i8_zext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[SPLAT_B:%[0-9]+]]:msa128b = SPLAT_B [[LD_B]], [[COPY1]] + ; P5600: [[COPY2:%[0-9]+]]:fgr32 = COPY [[SPLAT_B]].sub_lo + ; P5600: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; P5600: [[SRL:%[0-9]+]]:gpr32 = SRL [[COPY3]], 24 + ; P5600: $v0 = COPY [[SRL]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %1:gprb(s32) = COPY $a1 + %2:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:gprb(s32) = G_ZEXT_EXTRACT_VECTOR_ELT %2(<16 x s8>), %1(s32) + $v0 = COPY %4(s32) + RetRA implicit $v0 + +... +--- +name: extract_i16_zext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i16_zext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[SPLAT_H:%[0-9]+]]:msa128h = SPLAT_H [[LD_H]], [[COPY1]] + ; P5600: [[COPY2:%[0-9]+]]:fgr32 = COPY [[SPLAT_H]].sub_lo + ; P5600: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; P5600: [[SRL:%[0-9]+]]:gpr32 = SRL [[COPY3]], 16 + ; P5600: $v0 = COPY [[SRL]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %1:gprb(s32) = COPY $a1 + %2:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:gprb(s32) = G_ZEXT_EXTRACT_VECTOR_ELT %2(<8 x s16>), %1(s32) + $v0 = COPY %4(s32) + RetRA implicit $v0 + +... +--- +name: extract_i32_zext +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i32_zext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[SPLAT_W:%[0-9]+]]:msa128w = SPLAT_W [[LD_W]], [[COPY1]] + ; P5600: [[COPY2:%[0-9]+]]:fgr32 = COPY [[SPLAT_W]].sub_lo + ; P5600: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]] + ; P5600: $v0 = COPY [[COPY3]] + ; P5600: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %1:gprb(s32) = COPY $a1 + %2:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %3:gprb(s32) = G_ZEXT_EXTRACT_VECTOR_ELT %2(<4 x s32>), %1(s32) + $v0 = COPY %3(s32) + RetRA implicit $v0 + +... +--- +name: extract_float_fprb +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_float_fprb + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[SPLAT_W:%[0-9]+]]:msa128w = SPLAT_W [[LD_W]], [[COPY1]] + ; P5600: [[COPY2:%[0-9]+]]:fgr32 = COPY [[SPLAT_W]].sub_lo + ; P5600: $f0 = COPY [[COPY2]] + ; P5600: RetRA implicit $f0 + %0:gprb(p0) = COPY $a0 + %1:gprb(s32) = COPY $a1 + %2:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %3:fprb(s32) = G_EXTRACT_VECTOR_ELT %2(<4 x s32>), %1(s32) + $f0 = COPY %3(s32) + RetRA implicit $f0 + +... +--- +name: extract_double +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_double + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.V) + ; P5600: [[SPLAT_D:%[0-9]+]]:msa128d = SPLAT_D [[LD_D]], [[COPY1]] + ; P5600: [[COPY2:%[0-9]+]]:fgr64 = COPY [[SPLAT_D]].sub_64 + ; P5600: $d0_64 = COPY [[COPY2]] + ; P5600: RetRA implicit $d0_64 + %0:gprb(p0) = COPY $a0 + %1:gprb(s32) = COPY $a1 + %2:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %3:fprb(s64) = G_EXTRACT_VECTOR_ELT %2(<2 x s64>), %1(s32) + $d0_64 = COPY %3(s64) + RetRA implicit $d0_64 + +... Index: llvm/test/CodeGen/Mips/GlobalISel/instruction-select/insert_vector_elt_imm_index.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/instruction-select/insert_vector_elt_imm_index.mir @@ -0,0 +1,148 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @insert_i8(i8 %val, <16 x i8>* %V) { entry: ret void } + define void @insert_i16(i16 %val, <8 x i16>* %V) { entry: ret void } + define void @insert_i32(i32 %val, <4 x i32>* %V) { entry: ret void } + define void @insert_float_fprb(float %val, <4 x float>* %V) { entry: ret void } + define void @insert_double(double %val, <2 x double>* %V) { entry: ret void } + +... +--- +name: insert_i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i8 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_B:%[0-9]+]]:msa128b = INSERT_B [[LD_B]], [[COPY]], 7 + ; P5600: ST_B [[INSERT_B]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %2:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %5:gprb(s32) = G_CONSTANT i32 7 + %3:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:gprb(s32) = COPY %2(s32) + %4:fprb(<16 x s8>) = G_INSERT_VECTOR_ELT %3, %6(s32), %5(s32) + G_STORE %4(<16 x s8>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i16 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_H:%[0-9]+]]:msa128h = INSERT_H [[LD_H]], [[COPY]], 5 + ; P5600: ST_H [[INSERT_H]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %2:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %5:gprb(s32) = G_CONSTANT i32 5 + %3:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:gprb(s32) = COPY %2(s32) + %4:fprb(<8 x s16>) = G_INSERT_VECTOR_ELT %3, %6(s32), %5(s32) + G_STORE %4(<8 x s16>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i32 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_W:%[0-9]+]]:msa128w = INSERT_W [[LD_W]], [[COPY]], 2 + ; P5600: ST_W [[INSERT_W]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %4:gprb(s32) = G_CONSTANT i32 2 + %2:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %4(s32) + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_fprb +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a1, $f12 + + ; P5600-LABEL: name: insert_float_fprb + ; P5600: liveins: $a1, $f12 + ; P5600: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128w = INSERT_FW_PSEUDO [[LD_W]], 3, [[COPY]] + ; P5600: ST_W [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:fprb(s32) = COPY $f12 + %1:gprb(p0) = COPY $a1 + %4:gprb(s32) = G_CONSTANT i32 3 + %2:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %4(s32) + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_double +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a2, $d12_64 + + ; P5600-LABEL: name: insert_double + ; P5600: liveins: $a2, $d12_64 + ; P5600: [[COPY:%[0-9]+]]:fgr64 = COPY $d12_64 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128d = INSERT_FD_PSEUDO [[LD_D]], 1, [[COPY]] + ; P5600: ST_D [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:fprb(s64) = COPY $d12_64 + %1:gprb(p0) = COPY $a2 + %4:gprb(s32) = G_CONSTANT i32 1 + %2:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:fprb(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %4(s32) + G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... Index: llvm/test/CodeGen/Mips/GlobalISel/instruction-select/insert_vector_elt_variable_index.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/instruction-select/insert_vector_elt_variable_index.mir @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @insert_i8(i8 %val, <16 x i8>* %V, i32 %index) { entry: ret void } + define void @insert_i16(i16 %val, <8 x i16>* %V, i32 %index) { entry: ret void } + define void @insert_i32(i32 %val, <4 x i32>* %V, i32 %index) { entry: ret void } + define void @insert_float_fprb(float %val, <4 x float>* %V, i32 %index) { entry: ret void } + define void @insert_double(double %val, <2 x double>* %V, i32 %index) { entry: ret void } + +... +--- +name: insert_i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i8 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128b = INSERT_B_VIDX_PSEUDO [[LD_B]], [[COPY2]], [[COPY]] + ; P5600: ST_B [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %3:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %2:gprb(s32) = COPY $a2 + %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:gprb(s32) = COPY %3(s32) + %5:fprb(<16 x s8>) = G_INSERT_VECTOR_ELT %4, %6(s32), %2(s32) + G_STORE %5(<16 x s8>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i16 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128h = INSERT_H_VIDX_PSEUDO [[LD_H]], [[COPY2]], [[COPY]] + ; P5600: ST_H [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %3:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %2:gprb(s32) = COPY $a2 + %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:gprb(s32) = COPY %3(s32) + %5:fprb(<8 x s16>) = G_INSERT_VECTOR_ELT %4, %6(s32), %2(s32) + G_STORE %5(<8 x s16>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i32 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128w = INSERT_W_VIDX_PSEUDO [[LD_W]], [[COPY2]], [[COPY]] + ; P5600: ST_W [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %2:gprb(s32) = COPY $a2 + %3:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %2(s32) + G_STORE %4(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_fprb +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a1, $a2, $f12 + + ; P5600-LABEL: name: insert_float_fprb + ; P5600: liveins: $a1, $a2, $f12 + ; P5600: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128w = INSERT_FW_VIDX_PSEUDO [[LD_W]], [[COPY2]], [[COPY]] + ; P5600: ST_W [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:fprb(s32) = COPY $f12 + %1:gprb(p0) = COPY $a1 + %2:gprb(s32) = COPY $a2 + %3:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %2(s32) + G_STORE %4(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_double +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a2, $a3, $d12_64 + + ; P5600-LABEL: name: insert_double + ; P5600: liveins: $a2, $a3, $d12_64 + ; P5600: [[COPY:%[0-9]+]]:fgr64 = COPY $d12_64 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a3 + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128d = INSERT_FD_VIDX_PSEUDO [[LD_D]], [[COPY2]], [[COPY]] + ; P5600: ST_D [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:fprb(s64) = COPY $d12_64 + %1:gprb(p0) = COPY $a2 + %2:gprb(s32) = COPY $a3 + %3:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:fprb(<2 x s64>) = G_INSERT_VECTOR_ELT %3, %0(s64), %2(s32) + G_STORE %4(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... Index: llvm/test/CodeGen/Mips/GlobalISel/legalizer/extract_vector_elt_pre_isel.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/legalizer/extract_vector_elt_pre_isel.mir @@ -0,0 +1,134 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @extract_i8_sext(<16 x i8>* %V) { entry: ret void } + define void @extract_i16_zext(<8 x i16>* %V) { entry: ret void } + define void @extract_i8_anyext(<16 x i8>* %V) { entry: ret void } + define void @extract_i32(<4 x i32>* %V) { entry: ret void } + define void @extract_i8_combine_immune(<16 x i8>* %V, i32 %idx, i8* %pc) { entry: ret void } + +... +--- +name: extract_i8_sext +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i8_sext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_SEXT_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s8>), [[C]](s32) + ; P5600: $v0 = COPY [[SEXT_EXTRACT_VECTOR_ELT]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %4:_(s32) = G_CONSTANT i32 7 + %2:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %5:_(s32) = G_SEXT_EXTRACT_VECTOR_ELT %2(<16 x s8>), %4(s32) + $v0 = COPY %5(s32) + RetRA implicit $v0 + +... +--- +name: extract_i16_zext +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i16_zext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_ZEXT_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s16>), [[C]](s32) + ; P5600: $v0 = COPY [[ZEXT_EXTRACT_VECTOR_ELT]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %4:_(s32) = G_CONSTANT i32 7 + %2:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %5:_(s32) = G_ZEXT_EXTRACT_VECTOR_ELT %2(<8 x s16>), %4(s32) + $v0 = COPY %5(s32) + RetRA implicit $v0 + +... +--- +name: extract_i8_anyext +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i8_anyext + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s8>), [[C]](s32) + ; P5600: $v0 = COPY [[EVEC]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %3:_(s32) = G_CONSTANT i32 7 + %1:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:_(s32) = G_EXTRACT_VECTOR_ELT %1(<16 x s8>), %3(s32) + $v0 = COPY %4(s32) + RetRA implicit $v0 + +... +--- +name: extract_i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i32 + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s32>), [[C]](s32) + ; P5600: $v0 = COPY [[EVEC]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %3:_(s32) = G_CONSTANT i32 2 + %1:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %2:_(s32) = G_EXTRACT_VECTOR_ELT %1(<4 x s32>), %3(s32) + $v0 = COPY %2(s32) + RetRA implicit $v0 + +... +--- +name: extract_i8_combine_immune +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: extract_i8_combine_immune + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s8>), [[COPY1]](s32) + ; P5600: [[COPY3:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32) + ; P5600: G_STORE [[COPY3]](s32), [[COPY2]](p0) :: (store 1 into %ir.pc) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(s32) = COPY $a1 + %2:_(p0) = COPY $a2 + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:_(s8) = G_EXTRACT_VECTOR_ELT %3(<16 x s8>), %1(s32) + G_STORE %4(s8), %2(p0) :: (store 1 into %ir.pc) + RetRA + +... Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/extract_vector_elt_pre_isel.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/extract_vector_elt_pre_isel.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=P5600 + +define i32 @extract_i8_sext(<16 x i8>* %V) { +; P5600-LABEL: extract_i8_sext: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.b $w0, 0($4) +; P5600-NEXT: copy_s.b $2, $w0[7] +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <16 x i8>, <16 x i8>* %V, align 16 + %vecext = extractelement <16 x i8> %0, i32 7 + %conv = sext i8 %vecext to i32 + ret i32 %conv +} + +define i32 @extract_i16_zext(<8 x i16>* %V) { +; P5600-LABEL: extract_i16_zext: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.h $w0, 0($4) +; P5600-NEXT: copy_u.h $2, $w0[7] +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <8 x i16>, <8 x i16>* %V, align 16 + %vecext = extractelement <8 x i16> %0, i32 7 + %conv = zext i16 %vecext to i32 + ret i32 %conv +} + +define i8 @extract_i8_anyext(<16 x i8>* %V) { +; P5600-LABEL: extract_i8_anyext: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.b $w0, 0($4) +; P5600-NEXT: copy_s.b $2, $w0[7] +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <16 x i8>, <16 x i8>* %V, align 16 + %vecext = extractelement <16 x i8> %0, i32 7 + ret i8 %vecext +} + +define i32 @extract_i32(<4 x i32>* %V) { +; P5600-LABEL: extract_i32: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.w $w0, 0($4) +; P5600-NEXT: copy_s.w $2, $w0[2] +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <4 x i32>, <4 x i32>* %V, align 16 + %vecext = extractelement <4 x i32> %0, i32 2 + ret i32 %vecext +} + +define void @extract_i8_combine_immune(<16 x i8>* %V, i32 %idx, i8* %pc) { +; P5600-LABEL: extract_i8_combine_immune: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.b $w0, 0($4) +; P5600-NEXT: splat.b $w0, $w0[$5] +; P5600-NEXT: # kill: def $f0 killed $f0 killed $w0 +; P5600-NEXT: mfc1 $1, $f0 +; P5600-NEXT: sra $1, $1, 24 +; P5600-NEXT: sb $1, 0($6) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <16 x i8>, <16 x i8>* %V, align 16 + %vecext = extractelement <16 x i8> %0, i32 %idx + store i8 %vecext, i8* %pc, align 1 + ret void +} Index: llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/extract_vector_elt_pre_isel.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/extract_vector_elt_pre_isel.mir @@ -0,0 +1,138 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=mips-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @extract_i8_sext(<16 x i8>* %V) { entry: ret void } + define void @extract_i16_zext(<8 x i16>* %V) { entry: ret void } + define void @extract_i8_anyext(<16 x i8>* %V) { entry: ret void } + define void @extract_i32(<4 x i32>* %V) { entry: ret void } + define void @extract_i8_combine_immune(<16 x i8>* %V, i32 %idx, i8* %pc) { entry: ret void } + +... +--- +name: extract_i8_sext +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i8_sext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_SEXT_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s8>), [[C]](s32) + ; P5600: $v0 = COPY [[SEXT_EXTRACT_VECTOR_ELT]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %4:_(s32) = G_CONSTANT i32 7 + %2:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2(<16 x s8>), %4(s32) + %5:_(s32) = G_SEXT %3(s8) + $v0 = COPY %5(s32) + RetRA implicit $v0 + +... +--- +name: extract_i16_zext +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i16_zext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_ZEXT_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s16>), [[C]](s32) + ; P5600: $v0 = COPY [[ZEXT_EXTRACT_VECTOR_ELT]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %4:_(s32) = G_CONSTANT i32 7 + %2:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %3:_(s16) = G_EXTRACT_VECTOR_ELT %2(<8 x s16>), %4(s32) + %5:_(s32) = G_ZEXT %3(s16) + $v0 = COPY %5(s32) + RetRA implicit $v0 + +... +--- +name: extract_i8_anyext +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i8_anyext + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s8>), [[C]](s32) + ; P5600: $v0 = COPY [[EVEC]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %3:_(s32) = G_CONSTANT i32 7 + %1:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %2:_(s8) = G_EXTRACT_VECTOR_ELT %1(<16 x s8>), %3(s32) + %4:_(s32) = G_ANYEXT %2(s8) + $v0 = COPY %4(s32) + RetRA implicit $v0 + +... +--- +name: extract_i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i32 + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s32>), [[C]](s32) + ; P5600: $v0 = COPY [[EVEC]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %3:_(s32) = G_CONSTANT i32 2 + %1:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %2:_(s32) = G_EXTRACT_VECTOR_ELT %1(<4 x s32>), %3(s32) + $v0 = COPY %2(s32) + RetRA implicit $v0 + +... +--- +name: extract_i8_combine_immune +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: extract_i8_combine_immune + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s8>), [[COPY1]](s32) + ; P5600: G_STORE [[EVEC]](s8), [[COPY2]](p0) :: (store 1 into %ir.pc) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(s32) = COPY $a1 + %2:_(p0) = COPY $a2 + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:_(s8) = G_EXTRACT_VECTOR_ELT %3(<16 x s8>), %1(s32) + G_STORE %4(s8), %2(p0) :: (store 1 into %ir.pc) + RetRA + +... Index: llvm/test/CodeGen/Mips/GlobalISel/regbankselect/extract_vector_elt_pre_isel.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/regbankselect/extract_vector_elt_pre_isel.mir @@ -0,0 +1,140 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @extract_i8_sext(<16 x i8>* %V) { entry: ret void } + define void @extract_i16_zext(<8 x i16>* %V) { entry: ret void } + define void @extract_i8_anyext(<16 x i8>* %V) { entry: ret void } + define void @extract_i32(<4 x i32>* %V) { entry: ret void } + define void @extract_i8_combine_immune(<16 x i8>* %V, i32 %idx, i8* %pc) { entry: ret void } + +... +--- +name: extract_i8_sext +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i8_sext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 7 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:gprb(s32) = G_SEXT_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s8>), [[C]](s32) + ; P5600: $v0 = COPY [[SEXT_EXTRACT_VECTOR_ELT]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %4:_(s32) = G_CONSTANT i32 7 + %2:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %5:_(s32) = G_SEXT_EXTRACT_VECTOR_ELT %2(<16 x s8>), %4(s32) + $v0 = COPY %5(s32) + RetRA implicit $v0 + +... +--- +name: extract_i16_zext +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: extract_i16_zext + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 7 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:gprb(s32) = G_ZEXT_EXTRACT_VECTOR_ELT [[LOAD]](<8 x s16>), [[C]](s32) + ; P5600: $v0 = COPY [[ZEXT_EXTRACT_VECTOR_ELT]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %4:_(s32) = G_CONSTANT i32 7 + %2:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %5:_(s32) = G_ZEXT_EXTRACT_VECTOR_ELT %2(<8 x s16>), %4(s32) + $v0 = COPY %5(s32) + RetRA implicit $v0 + +... +--- +name: extract_i8_anyext +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i8_anyext + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 7 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:gprb(s32) = G_SEXT_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s8>), [[C]](s32) + ; P5600: $v0 = COPY [[SEXT_EXTRACT_VECTOR_ELT]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %3:_(s32) = G_CONSTANT i32 7 + %1:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %4:_(s32) = G_EXTRACT_VECTOR_ELT %1(<16 x s8>), %3(s32) + $v0 = COPY %4(s32) + RetRA implicit $v0 + +... +--- +name: extract_i32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; P5600-LABEL: name: extract_i32 + ; P5600: liveins: $a0 + ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; P5600: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 2 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:gprb(s32) = G_SEXT_EXTRACT_VECTOR_ELT [[LOAD]](<4 x s32>), [[C]](s32) + ; P5600: $v0 = COPY [[SEXT_EXTRACT_VECTOR_ELT]](s32) + ; P5600: RetRA implicit $v0 + %0:_(p0) = COPY $a0 + %3:_(s32) = G_CONSTANT i32 2 + %1:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %2:_(s32) = G_EXTRACT_VECTOR_ELT %1(<4 x s32>), %3(s32) + $v0 = COPY %2(s32) + RetRA implicit $v0 + +... +--- +name: extract_i8_combine_immune +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: extract_i8_combine_immune + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.V) + ; P5600: [[SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:gprb(s32) = G_SEXT_EXTRACT_VECTOR_ELT [[LOAD]](<16 x s8>), [[COPY1]](s32) + ; P5600: [[COPY3:%[0-9]+]]:gprb(s32) = COPY [[SEXT_EXTRACT_VECTOR_ELT]](s32) + ; P5600: G_STORE [[COPY3]](s32), [[COPY2]](p0) :: (store 1 into %ir.pc) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(s32) = COPY $a1 + %2:_(p0) = COPY $a2 + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.V) + %6:_(s32) = G_EXTRACT_VECTOR_ELT %3(<16 x s8>), %1(s32) + %5:_(s32) = COPY %6(s32) + G_STORE %5(s32), %2(p0) :: (store 1 into %ir.pc) + RetRA + +... Index: llvm/utils/TableGen/CodeGenRegisters.cpp =================================================================== --- llvm/utils/TableGen/CodeGenRegisters.cpp +++ llvm/utils/TableGen/CodeGenRegisters.cpp @@ -1008,7 +1008,10 @@ if (SuperRegRCsBV[RC.EnumValue]) SuperRegRCs.emplace_back(&RC); llvm::sort(SuperRegRCs, SizeOrder); - assert(SuperRegRCs.front() == BiggestSuperRegRC && "Biggest class wasn't first"); + // It's ok to have a few classes with same size as BiggestSuperRegRC. + assert(SuperRegRCs.front()->getMembers().size() == + BiggestSuperRegRC->getMembers().size() && + "Biggest class wasn't first"); // Find all the subreg classes and order them by size too. std::vector> SuperRegClasses; @@ -1051,11 +1054,27 @@ // LOW32_ADDR_ACCESS_RBP is a valid choice but contains registers that // aren't subregisters of SuperRegRC whereas GR32 has a direct 1:1 // mapping. + + // Mips aliases same physical registers on a few diffent classes for + // vectors. They hold different vector types of same total size in bits. + // They all have same subclassess, and we want to return + // std::make_pair(BiggestSuperRegRC, fgr32) + if ((ChosenSuperRegClass->getMembers().size() == + BiggestSuperRegRC->getMembers().size()) && + (ChosenSuperRegClass != BiggestSuperRegRC)) + continue; + if (SuperRegRC->getMembers().size() >= SubRegRC->getMembers().size()) return std::make_pair(ChosenSuperRegClass, SubRegRC); } } + if (ChosenSuperRegClass) + if ((ChosenSuperRegClass->getMembers().size() == + BiggestSuperRegRC->getMembers().size()) && + (ChosenSuperRegClass != BiggestSuperRegRC)) + continue; + // If we found a fit but it wasn't quite ideal because SubRegRC had excess // registers, then we're done. if (ChosenSuperRegClass) Index: llvm/utils/TableGen/GlobalISelEmitter.cpp =================================================================== --- llvm/utils/TableGen/GlobalISelEmitter.cpp +++ llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -1256,6 +1256,30 @@ } }; +class ScalarOfPtrSizeMatcher : public OperandPredicateMatcher { +protected: + unsigned SizeInBits; + +public: + ScalarOfPtrSizeMatcher(unsigned InsnVarID, unsigned OpIdx, + unsigned SizeInBits) + : OperandPredicateMatcher(OPM_PointerToAny, InsnVarID, OpIdx), + SizeInBits(SizeInBits) {} + + static bool classof(const OperandPredicateMatcher *P) { + return P->getKind() == OPM_PointerToAny; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_CheckScalarOfPtrSize") + << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID) + << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx) + << MatchTable::Comment("SizeInBits") + << MatchTable::IntValue(SizeInBits) << MatchTable::LineBreak; + } +}; + /// Generates code to check that an operand is a particular target constant. class ComplexPatternOperandMatcher : public OperandPredicateMatcher { protected: @@ -1590,8 +1614,11 @@ if (!VTy.isMachineValueType()) return failedImport("unsupported typeset"); - if (VTy.getMachineValueType() == MVT::iPTR && OperandIsAPointer) { - addPredicate(0); + if (VTy.getMachineValueType() == MVT::iPTR) { + if (OperandIsAPointer) + addPredicate(0); + else + addPredicate(0); return Error::success(); } @@ -2538,6 +2565,31 @@ } }; +class TempSubRegRenderer : public OperandRenderer { +protected: + unsigned InsnID; + unsigned TempRegID; + const CodeGenSubRegIndex *SRI; + +public: + TempSubRegRenderer(unsigned InsnID, unsigned TempRegID, + const CodeGenSubRegIndex *SRI) + : OperandRenderer(OR_Register), InsnID(InsnID), TempRegID(TempRegID), + SRI(SRI) {} + + static bool classof(const OperandRenderer *R) { + return R->getKind() == OR_TempRegister; + } + + void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIR_CopyTmpSubReg") + << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID) + << MatchTable::Comment("TempRegID") << MatchTable::IntValue(TempRegID) + << MatchTable::Comment("SubRegIdx") + << MatchTable::IntValue(SRI->EnumValue); + Table << MatchTable::LineBreak; + } +}; /// Adds a specific physical register to the instruction being built. /// This is typically useful for WZR/XZR on AArch64. class AddRegisterRenderer : public OperandRenderer { @@ -3236,7 +3288,7 @@ const RecordKeeper &RK; const CodeGenDAGPatterns CGP; const CodeGenTarget &Target; - CodeGenRegBank CGRegs; + CodeGenRegBank &CGRegs; /// Keep track of the equivalence between SDNodes and Instruction by mapping /// SDNodes to the GINodeEquiv mapping. We need to map to the GINodeEquiv to @@ -3444,7 +3496,7 @@ GlobalISelEmitter::GlobalISelEmitter(RecordKeeper &RK) : RK(RK), CGP(RK), Target(CGP.getTargetInfo()), - CGRegs(RK, Target.getHwModes()) {} + CGRegs(CGP.getTargetInfo().getRegBank()) {} //===- Emitter ------------------------------------------------------------===// @@ -4241,32 +4293,87 @@ // EXTRACT_SUBREG needs to use a subregister COPY. if (Name == "EXTRACT_SUBREG") { - if (!Dst->getChild(0)->isLeaf()) - return failedImport("EXTRACT_SUBREG child #1 is not a leaf"); + if (!Dst->getChild(1)->isLeaf()) + return failedImport("EXTRACT_SUBREG child #2?(the one with subindex) is not a leaf, SDNodeXForm?"); if (DefInit *SubRegInit = dyn_cast(Dst->getChild(1)->getLeafValue())) { - Record *RCDef = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); - if (!RCDef) - return failedImport("EXTRACT_SUBREG child #0 could not " - "be coerced to a register class"); - - CodeGenRegisterClass *RC = CGRegs.getRegClass(RCDef); - CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef()); - - const auto &SrcRCDstRCPair = - RC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx); - if (SrcRCDstRCPair.hasValue()) { - assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass"); - if (SrcRCDstRCPair->first != RC) - return failedImport("EXTRACT_SUBREG requires an additional COPY"); - } - DstMIBuilder.addRenderer(Dst->getChild(0)->getName(), - SubIdx); - return InsertPt; - } + if (Dst->getChild(0)->isLeaf()) { + Record *RCDef = + getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); + if (!RCDef) + return failedImport("EXTRACT_SUBREG child #0 could not " + "be coerced to a register class"); + + CodeGenRegisterClass *RC = CGRegs.getRegClass(RCDef); + CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef()); + + const auto &SrcRCDstRCPair = + RC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx); + if (SrcRCDstRCPair.hasValue()) { + assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass"); + if (SrcRCDstRCPair->first != RC) + return failedImport("EXTRACT_SUBREG requires an additional COPY"); + } + + DstMIBuilder.addRenderer( + Dst->getChild(0)->getName(), SubIdx); + return InsertPt; + } else { + Record *RCDef = nullptr; + for (const RecordVal &Val : + Dst->getChild(0)->getOperator()->getValues()) { + if (Val.getNameInitAsString() == "OutOperandList") { + DagInit *Val_DagInit = static_cast(Val.getValue()); + if (Val_DagInit->getNumArgs() != 1) + return failedImport("EXTRACT_SUBREG has more then 1 outs"); + if (DefInit *VDefInit = dyn_cast(Val_DagInit->getArg(0))) { + if (VDefInit->getDef()->isSubClassOf("RegisterOperand")) + RCDef = VDefInit->getDef()->getValueAsDef("RegClass"); + } + } + } + if (!RCDef) + return failedImport("EXTRACT_SUBREG child #0 could not " + "be coerced to a register class"); + + CodeGenRegisterClass *RC = CGRegs.getRegClass(RCDef); + CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef()); + const auto &SrcRCDstRCPair = + RC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx); + if (SrcRCDstRCPair.hasValue()) { + assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass"); + if (SrcRCDstRCPair->first != RC) + return failedImport( + "EXTRACT_SUBREG requires an additional COPY, but it's more " + "likely that we got wrong class..."); + } + + ArrayRef ChildTypes = Dst->getChild(0)->getExtTypes(); + if (ChildTypes.size() != 1) + return failedImport("Dst pattern child has multiple results"); + + Optional OpTyOrNone = None; + if (ChildTypes.front().isMachineValueType()) + OpTyOrNone = + MVTToLLT(ChildTypes.front().getMachineValueType().SimpleTy); + if (!OpTyOrNone) + return failedImport("Dst operand has an unsupported type"); + + unsigned TempRegID = M.allocateTempRegID(); + InsertPt = M.insertAction( + InsertPt, OpTyOrNone.getValue(), TempRegID); + DstMIBuilder.addRenderer(TempRegID, SubIdx); + + auto InsertPtOrError = createAndImportSubInstructionRenderer( + ++InsertPt, M, Dst->getChild(0), TempRegID); + if (auto Error = InsertPtOrError.takeError()) + return std::move(Error); + return InsertPtOrError.get(); + } + } return failedImport("EXTRACT_SUBREG child #1 is not a subreg index"); } @@ -4639,12 +4746,23 @@ if (DstIOpRec == nullptr) return failedImport("REG_SEQUENCE operand #0 isn't a register class"); } else if (DstIName == "EXTRACT_SUBREG") { - if (!Dst->getChild(0)->isLeaf()) - return failedImport("EXTRACT_SUBREG operand #0 isn't a leaf"); - - // We can assume that a subregister is in the same bank as it's super - // register. - DstIOpRec = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); + if (!Dst->getChild(0)->isLeaf()) { + DstIOpRec = nullptr; + for (const RecordVal &Val : + Dst->getChild(0)->getOperator()->getValues()) { + if (Val.getNameInitAsString() == "OutOperandList") { + DagInit *Val_DagInit = static_cast(Val.getValue()); + if (Val_DagInit->getNumArgs() != 1) + return failedImport("EXTRACT_SUBREG has more then 1 outs"); + if (DefInit *VDefInit = dyn_cast(Val_DagInit->getArg(0))) + if (VDefInit->getDef()->isSubClassOf("RegisterOperand")) + DstIOpRec = VDefInit->getDef()->getValueAsDef("RegClass"); + } + } + } else + // We can assume that a subregister is in the same bank as it's super + // register. + DstIOpRec = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); if (DstIOpRec == nullptr) return failedImport("EXTRACT_SUBREG operand #0 isn't a register class"); @@ -4742,6 +4860,9 @@ const auto &SrcRCDstRCPair = (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx); + if (!SrcRCDstRCPair.hasValue()) + return failedImport( + "EXTRACT_SUBREG didn't find classes for constraints?\n"); assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass"); M.addAction(0, 0, *SrcRCDstRCPair->second); M.addAction(0, 1, *SrcRCDstRCPair->first);