Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -75,6 +75,7 @@ unsigned DstVec, unsigned InsReg, const RegisterBank &RB, MachineIRBuilder &MIRBuilder) const; + bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; @@ -1716,6 +1717,8 @@ return selectShuffleVector(I, MRI); case TargetOpcode::G_EXTRACT_VECTOR_ELT: return selectExtractElt(I, MRI); + case TargetOpcode::G_INSERT_VECTOR_ELT: + return selectInsertElt(I, MRI); } return false; @@ -2331,6 +2334,50 @@ return InsElt; } +bool AArch64InstructionSelector::selectInsertElt( + MachineInstr &I, MachineRegisterInfo &MRI) const { + assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); + + // Get information on the destination. + unsigned DstReg = I.getOperand(0).getReg(); + const LLT DstTy = MRI.getType(DstReg); + if (DstTy.getSizeInBits() < 128) { + // TODO: Handle unpacked vectors. + LLVM_DEBUG(dbgs() << "Unpacked vectors not supported yet!"); + return false; + } + + // Get information on the element we want to insert into the destination. + unsigned EltReg = I.getOperand(2).getReg(); + const LLT EltTy = MRI.getType(EltReg); + unsigned EltSize = EltTy.getSizeInBits(); + if (EltSize < 16 || EltSize > 64) + return false; // Don't support all element types yet. + + // Find the definition of the index. Bail out if it's not defined by a + // G_CONSTANT. + unsigned IdxReg = I.getOperand(3).getReg(); + unsigned LaneIdx = 0; + if (!getConstantValueForReg(IdxReg, MRI, LaneIdx)) + return false; + + // Perform the lane insert. + unsigned SrcVec = I.getOperand(1).getReg(); + const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); + MachineIRBuilder MIRBuilder(I); + MachineInstr *InsMI = + &*emitLaneInsert(LaneIdx, EltSize, SrcVec, EltReg, EltRB, MIRBuilder); + + // Re-jigger the insert instruction so that the result is an insert into the + // destination. + InsMI->getOperand(0).setReg(DstReg); + + // Make sure that the destination is constrained at the end. + constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); + I.eraseFromParent(); + return true; +} + bool AArch64InstructionSelector::selectBuildVector( MachineInstr &I, MachineRegisterInfo &MRI) const { assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); Index: llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -450,6 +450,12 @@ VecTy == v2s64 || VecTy == v2s32; }); + getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) + .legalIf([=](const LegalityQuery &Query) { + return Query.Types[0].isVector() && + Query.Types[0].getElementType() == Query.Types[1]; + }); + getActionDefinitionsBuilder(G_BUILD_VECTOR) .legalFor({{v4s16, s16}, {v8s16, s16}, Index: llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -697,7 +697,19 @@ // Index needs to be a GPR. OpRegBankIdx[2] = PMI_FirstGPR; break; + case TargetOpcode::G_INSERT_VECTOR_ELT: + OpRegBankIdx[0] = PMI_FirstFPR; + OpRegBankIdx[1] = PMI_FirstFPR; + + // The element may be either a GPR or FPR. Preserve that behaviour. + if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank) + OpRegBankIdx[2] = PMI_FirstFPR; + else + OpRegBankIdx[2] = PMI_FirstGPR; + // Index needs to be a GPR. + OpRegBankIdx[3] = PMI_FirstGPR; + break; case TargetOpcode::G_BUILD_VECTOR: // If the first source operand belongs to a FPR register bank, then make // sure that we preserve that. Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -307,7 +307,7 @@ # DEBUG: .. type index coverage check SKIPPED: no rules defined # # DEBUG-NEXT: G_INSERT_VECTOR_ELT (opcode {{[0-9]+}}): 3 type indices -# DEBUG: .. type index coverage check SKIPPED: no rules defined +# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected # # DEBUG-NEXT: G_EXTRACT_VECTOR_ELT (opcode {{[0-9]+}}): 3 type indices # DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected Index: llvm/test/CodeGen/AArch64/GlobalISel/regbank-insert-vector-elt.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/regbank-insert-vector-elt.mir @@ -0,0 +1,113 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-unknown -run-pass=regbankselect %s -o - | FileCheck %s + +# The following should hold here: +# +# 1) The first and second operands of G_INSERT_VECTOR_ELT should be FPRs since +# they are vectors. +# +# 2) The third operand should be on the register bank given in the test name +# (e.g, v4s32_fpr). AArch64 supports native inserts of GPRs, so we need to +# preserve that. +# +# 3) The fourth operand should be a GPR, since it's a constant. + +name: v4s32_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q1, $s0 + + ; CHECK-LABEL: name: v4s32_fpr + ; CHECK: liveins: $q1, $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr(<4 x s32>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[IVEC:%[0-9]+]]:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s32) + ; CHECK: $q0 = COPY [[IVEC]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(s32) = COPY $s0 + %1:_(<4 x s32>) = COPY $q1 + %3:_(s32) = G_CONSTANT i32 1 + %2:_(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: v4s32_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: v4s32_gpr + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[IVEC:%[0-9]+]]:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s32) + ; CHECK: $q0 = COPY [[IVEC]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(s32) = COPY $w0 + %1:_(<4 x s32>) = COPY $q0 + %3:_(s32) = G_CONSTANT i32 1 + %2:_(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: v2s64_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $q1 + + ; CHECK-LABEL: name: v2s64_fpr + ; CHECK: liveins: $d0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr(<2 x s64>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[IVEC:%[0-9]+]]:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s64), [[C]](s32) + ; CHECK: $q0 = COPY [[IVEC]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(s64) = COPY $d0 + %1:_(<2 x s64>) = COPY $q1 + %3:_(s32) = G_CONSTANT i32 1 + %2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s32) + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: v2s64_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $x0 + + ; CHECK-LABEL: name: v2s64_gpr + ; CHECK: liveins: $q0, $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr(<2 x s64>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[IVEC:%[0-9]+]]:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s64), [[C]](s32) + ; CHECK: $q0 = COPY [[IVEC]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(s64) = COPY $x0 + %1:_(<2 x s64>) = COPY $q0 + %3:_(s32) = G_CONSTANT i32 0 + %2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s32) + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 + +... Index: llvm/test/CodeGen/AArch64/GlobalISel/select-insert-vector-elt.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/select-insert-vector-elt.mir @@ -0,0 +1,106 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-unknown -run-pass=instruction-select %s -o - | FileCheck %s + +name: v4s32_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q1, $s0 + + ; CHECK-LABEL: name: v4s32_fpr + ; CHECK: liveins: $q1, $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub + ; CHECK: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[COPY1]], 1, [[INSERT_SUBREG]], 0 + ; CHECK: $q0 = COPY [[INSvi32lane]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(s32) = COPY $s0 + %1:fpr(<4 x s32>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 1 + %2:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: v4s32_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: v4s32_gpr + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[COPY1]], 1, [[COPY]] + ; CHECK: $q0 = COPY [[INSvi32gpr]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:gpr(s32) = COPY $w0 + %1:fpr(<4 x s32>) = COPY $q0 + %3:gpr(s32) = G_CONSTANT i32 1 + %2:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: v2s64_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $q1 + + ; CHECK-LABEL: name: v2s64_fpr + ; CHECK: liveins: $d0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[COPY1]], 1, [[INSERT_SUBREG]], 0 + ; CHECK: $q0 = COPY [[INSvi64lane]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(s64) = COPY $d0 + %1:fpr(<2 x s64>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 1 + %2:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s32) + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: v2s64_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $x0 + + ; CHECK-LABEL: name: v2s64_gpr + ; CHECK: liveins: $q0, $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[INSvi64gpr:%[0-9]+]]:fpr128 = INSvi64gpr [[COPY1]], 0, [[COPY]] + ; CHECK: $q0 = COPY [[INSvi64gpr]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:gpr(s64) = COPY $x0 + %1:fpr(<2 x s64>) = COPY $q0 + %3:gpr(s32) = G_CONSTANT i32 0 + %2:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s32) + $q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit $q0 + +...