Index: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -2561,11 +2561,7 @@ // Get information on the destination. unsigned DstReg = I.getOperand(0).getReg(); const LLT DstTy = MRI.getType(DstReg); - if (DstTy.getSizeInBits() < 128) { - // TODO: Handle unpacked vectors. - LLVM_DEBUG(dbgs() << "Unpacked vectors not supported yet!"); - return false; - } + unsigned VecSize = DstTy.getSizeInBits(); // Get information on the element we want to insert into the destination. unsigned EltReg = I.getOperand(2).getReg(); @@ -2585,7 +2581,50 @@ unsigned SrcReg = I.getOperand(1).getReg(); const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); MachineIRBuilder MIRBuilder(I); - emitLaneInsert(DstReg, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder); + + if (VecSize < 128) { + // If the vector we're inserting into is smaller than 128 bits, widen it + // to 128 to do the insert. + MachineInstr *ScalarToVec = emitScalarToVector( + VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder); + if (!ScalarToVec) + return false; + SrcReg = ScalarToVec->getOperand(0).getReg(); + } + + // Create an insert into a new FPR128 register. + // Note that if our vector is already 128 bits, we end up emitting an extra + // register. + MachineInstr *InsMI = + emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder); + + if (VecSize < 128) { + // If we had to widen to perform the insert, then we have to demote back to + // the original size to get the result we want. + unsigned DemoteVec = InsMI->getOperand(0).getReg(); + const TargetRegisterClass *RC = + getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize); + if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { + LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); + return false; + } + unsigned SubReg = 0; + if (!getSubRegForClass(RC, TRI, SubReg)) + return false; + if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { + LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize + << "\n"); + return false; + } + MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {}) + .addReg(DemoteVec, 0, SubReg); + RBI.constrainGenericRegister(DstReg, *RC, MRI); + } else { + // No widening needed. + InsMI->getOperand(0).setReg(DstReg); + constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); + } + I.eraseFromParent(); return true; } Index: llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -448,9 +448,8 @@ getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) .legalIf([=](const LegalityQuery &Query) { const LLT &VecTy = Query.Types[0]; - // TODO: Support destination sizes of < 128 bits. // TODO: Support s8 and s16 - return VecTy == v4s32 || VecTy == v2s64; + return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64; }); getActionDefinitionsBuilder(G_BUILD_VECTOR) Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-insert-vector-elt.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-insert-vector-elt.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-insert-vector-elt.mir @@ -111,3 +111,53 @@ RET_ReallyLR implicit $q0 ... +--- +name: v2s32_fpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d1, $s0 + + ; CHECK-LABEL: name: v2s32_fpr + ; CHECK: liveins: $d1, $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr(<2 x s32>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[IVEC:%[0-9]+]]:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s32) + ; CHECK: $d0 = COPY [[IVEC]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(s32) = COPY $s0 + %1:_(<2 x s32>) = COPY $d1 + %3:_(s32) = G_CONSTANT i32 1 + %2:_(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: v2s32_gpr +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: v2s32_gpr + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr(<2 x s32>) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[IVEC:%[0-9]+]]:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s32) + ; CHECK: $d0 = COPY [[IVEC]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(s32) = COPY $w0 + %1:_(<2 x s32>) = COPY $d0 + %3:_(s32) = G_CONSTANT i32 1 + %2:_(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-insert-vector-elt.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-insert-vector-elt.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-insert-vector-elt.mir @@ -104,3 +104,61 @@ RET_ReallyLR implicit $q0 ... +--- +name: v2s32_fpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d1, $s0 + + ; CHECK-LABEL: name: v2s32_fpr + ; CHECK: liveins: $d1, $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.dsub + ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY]], %subreg.ssub + ; CHECK: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[INSvi32lane]].dsub + ; CHECK: $d0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(s32) = COPY $s0 + %1:fpr(<2 x s32>) = COPY $d1 + %3:gpr(s32) = G_CONSTANT i32 1 + %2:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: v2s32_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: v2s32_gpr + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.dsub + ; CHECK: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, [[COPY]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr]].dsub + ; CHECK: $d0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:gpr(s32) = COPY $w0 + %1:fpr(<2 x s32>) = COPY $d0 + %3:gpr(s32) = G_CONSTANT i32 1 + %2:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +...